def anomalis(self): log("\n[+] Anomalies Check\n") # Entropy based check.. imported from peutils pack = peutils.is_probably_packed(self.pe) if pack == 1: log("\t[*] Based on the sections entropy check! file is possibly packed") # SizeOfRawData Check.. some times size of raw data value is used to crash some debugging tools. nsec = self.pe.FILE_HEADER.NumberOfSections for i in range(0, nsec - 1): if i == nsec - 1: break else: nextp = self.pe.sections[i].SizeOfRawData + self.pe.sections[i].PointerToRawData currp = self.pe.sections[i + 1].PointerToRawData if nextp != currp: log("\t[*] The Size Of Raw data is valued illegal! Binary might crash your disassembler/debugger") break else: pass # Non-Ascii or empty section name check for sec in self.pe.sections: if not re.match("^[.A-Za-z][a-zA-Z]+", sec.Name): log("\t[*] Non-ascii or empty section names detected") break # Size of optional header check if self.pe.FILE_HEADER.SizeOfOptionalHeader != 224: log("\t[*] Illegal size of optional Header") # Zero checksum check if self.pe.OPTIONAL_HEADER.CheckSum == 0: log("\t[*] Header Checksum is zero!") # Entry point check enaddr = self.pe.OPTIONAL_HEADER.AddressOfEntryPoint vbsecaddr = self.pe.sections[0].VirtualAddress ensecaddr = self.pe.sections[0].Misc_VirtualSize entaddr = vbsecaddr + ensecaddr if enaddr > entaddr: log("\t[*] Enrty point is outside the 1st(.code) section! Binary is possibly packed") # Numeber of directories check if self.pe.OPTIONAL_HEADER.NumberOfRvaAndSizes != 16: log("\t[*] Optional Header NumberOfRvaAndSizes field is valued illegal") # Loader flags check if self.pe.OPTIONAL_HEADER.LoaderFlags != 0: log("\t[*] Optional Header LoaderFlags field is valued illegal") # TLS (Thread Local Storage) callback function check if hasattr(self.pe, "DIRECTORY_ENTRY_TLS"): log( "\t[*] TLS callback functions array detected at 0x%x" % self.pe.DIRECTORY_ENTRY_TLS.struct.AddressOfCallBacks ) callback_rva = self.pe.DIRECTORY_ENTRY_TLS.struct.AddressOfCallBacks - self.pe.OPTIONAL_HEADER.ImageBase log("\t[*] Callback Array RVA 0x%x" % callback_rva)
def metadata_subject_overview(): if SUBJECT.is_exe() is True: print_('Binary "%s" is an EXE' % ORIG_FILE) if SUBJECT.is_dll() is True: print_('Binary "%s" is a DLL' % ORIG_FILE) flagged = False if peutils.is_probably_packed(SUBJECT) is True: print_('Binary is possibly packed!') flagged = True if peutils.is_suspicious(SUBJECT) is True: print_('Binary is suspicious!') flagged = True if flagged is False: print_('Binary appears to be normal') print_('Address of Entry Point: 0x%08x' % SUBJECT.OPTIONAL_HEADER.AddressOfEntryPoint) misc_generate_checksum() misc_verify_checksum() print_('Sections:') for section in SUBJECT.sections: print_('\tRVA: 0x%08x - Name: %s - %i bytes' % (section.VirtualAddress, section.Name.strip('\x00'), section.SizeOfRawData)) print_('Imports from:') for entry in SUBJECT.DIRECTORY_ENTRY_IMPORT: count = 0 for i in entry.imports: count += 1 print_('\t%s -> %i functions' % (entry.dll, count))
def _populate_metadata(self): metadata = {} metadata['Filename'] = self.filename metadata['Magic Type'] = self.magic_type(self.filename) metadata['Size'] = self.filesize metadata['First Bytes'] = self.getbytestring(0, 16) metadata['Checksum'] = self.pe.OPTIONAL_HEADER.CheckSum metadata['Compile Time'] = '%s UTC' % time.asctime( time.gmtime(self.pe.FILE_HEADER.TimeDateStamp)) metadata['Signature'] = hex(self.pe.NT_HEADERS.Signature) metadata['Packed'] = peutils.is_probably_packed(self.pe) metadata['Image Base'] = hex(self.pe.OPTIONAL_HEADER.ImageBase) metadata['Sections'] = self.pe.FILE_HEADER.NumberOfSections metadata['Entry Point'] = "{0:#0{1}x}".format( self.pe.OPTIONAL_HEADER.AddressOfEntryPoint, 10) metadata['Subsystem'] = pefile.subsystem_types[ self.pe.OPTIONAL_HEADER.Subsystem][0] linker = '{}.{}'.format(self.pe.OPTIONAL_HEADER.MajorLinkerVersion, self.pe.OPTIONAL_HEADER.MinorLinkerVersion) try: metadata['Linker Version'] = '{} - ({})'.format( linker, self.linker_versions[linker]) except KeyError: metadata['Linker Version'] = '{}'.format(linker) metadata['EP Bytes'] = self.getbytestring( self.pe.OPTIONAL_HEADER.AddressOfEntryPoint, 16, True) return metadata
def anomalis(self): log("\n[+] Anomalies Check\n") # Entropy based check.. imported from peutils pack = peutils.is_probably_packed(self.pe) if pack == 1: log("\t[*] Based on the sections entropy check! file is possibly packed") # SizeOfRawData Check.. some times size of raw data value is used to crash some debugging tools. nsec = self.pe.FILE_HEADER.NumberOfSections for i in range(0,nsec-1): if i == nsec-1: break else: nextp = self.pe.sections[i].SizeOfRawData + self.pe.sections[i].PointerToRawData currp = self.pe.sections[i+1].PointerToRawData if nextp != currp: log("\t[*] The Size Of Raw data is valued illegal! Binary might crash your disassembler/debugger") break else: pass # Non-Ascii or empty section name check for sec in self.pe.sections: if not re.match("^[.A-Za-z][a-zA-Z]+",sec.Name): log("\t[*] Non-ascii or empty section names detected") break; # Size of optional header check if self.pe.FILE_HEADER.SizeOfOptionalHeader != 224: log("\t[*] Illegal size of optional Header") # Zero checksum check if self.pe.OPTIONAL_HEADER.CheckSum == 0: log("\t[*] Header Checksum is zero!") # Entry point check enaddr = self.pe.OPTIONAL_HEADER.AddressOfEntryPoint vbsecaddr = self.pe.sections[0].VirtualAddress ensecaddr = self.pe.sections[0].Misc_VirtualSize entaddr = vbsecaddr + ensecaddr if enaddr > entaddr: log("\t[*] Enrty point is outside the 1st(.code) section! Binary is possibly packed") # Numeber of directories check if self.pe.OPTIONAL_HEADER.NumberOfRvaAndSizes != 16: log("\t[*] Optional Header NumberOfRvaAndSizes field is valued illegal") # Loader flags check if self.pe.OPTIONAL_HEADER.LoaderFlags != 0: log("\t[*] Optional Header LoaderFlags field is valued illegal") # TLS (Thread Local Storage) callback function check if hasattr(self.pe,"DIRECTORY_ENTRY_TLS"): log("\t[*] TLS callback functions array detected at 0x%x" % self.pe.DIRECTORY_ENTRY_TLS.struct.AddressOfCallBacks) callback_rva = self.pe.DIRECTORY_ENTRY_TLS.struct.AddressOfCallBacks - self.pe.OPTIONAL_HEADER.ImageBase log("\t[*] Callback Array RVA 0x%x" % callback_rva)
def is_packed(filename): pack_status = False try: pe_instance = pefile.PE(filename) packed = peutils.is_probably_packed(pe_instance) if packed == 1: pack_status = True except Exception,e: logger.error("Error while checking if the file {} is packed using well known" " packer programs like UPX etc. - {}".format(filename, e.message), exc_info=True)
def is_packed(self): """ Check if the payload is packed """ try: return peutils.is_probably_packed(self.pe) except: return None
def _is_packed(self, pe) -> Union[bool, None]: """ Check if the payload is packed """ try: return peutils.is_probably_packed(pe) except: return None
def is_malicious(self, file_content): try: pe = pefile.PE(data=file_content) except pefile.PEFormatError: return False # peutils implements entropy check if peutils.is_probably_packed(pe) is True: print "[Detector] File entropy too high - a potential compressed/encrypted sections found" return True return False
def peid(self): pe_matches = dict() userdb_file_dir_path = path.join(MALICE_ROOT, 'data', 'UserDB.TXT') signatures = peutils.SignatureDatabase(userdb_file_dir_path) packer = [] matches = signatures.match_all(self.pe, ep_only=True) if matches: map(packer.append, [s[0] for s in matches]) pe_matches["peid_signature_match"] = packer pe_matches["is_probably_packed"] = peutils.is_probably_packed(self.pe) pe_matches["is_suspicious"] = peutils.is_suspicious(self.pe) pe_matches["is_valid"] = peutils.is_valid(self.pe) return pe_matches
def _build_peid_matches(self, scan_result): import peutils pe_matches = dict() UserDB_FILE_DIR_PATH = path.join(path.dirname(__file__), 'file', 'UserDB.TXT') signatures = peutils.SignatureDatabase(UserDB_FILE_DIR_PATH) packer = [] matches = signatures.match_all(scan_result, ep_only=True) if matches: map(packer.append, [s[0] for s in matches]) pe_matches["peid_signature_match"] = packer pe_matches["is_probably_packed"] = peutils.is_probably_packed(scan_result) pe_matches["is_suspicious"] = peutils.is_suspicious(scan_result) pe_matches["is_valid"] = peutils.is_valid(scan_result) return pe_matches
def _build_peid_matches(self, scan_result): import peutils pe_matches = dict() UserDB_FILE_DIR_PATH = path.join(path.dirname(__file__), 'file', '../pe/data/UserDB.TXT') signatures = peutils.SignatureDatabase(UserDB_FILE_DIR_PATH) packer = [] matches = signatures.match_all(scan_result, ep_only=True) if matches: map(packer.append, [s[0] for s in matches]) pe_matches["peid_signature_match"] = packer pe_matches["is_probably_packed"] = peutils.is_probably_packed( scan_result) pe_matches["is_suspicious"] = peutils.is_suspicious(scan_result) pe_matches["is_valid"] = peutils.is_valid(scan_result) return pe_matches
def scan(self): hashes = self.get_hashes() self.results = { 'MD5': hashes['md5'], 'SHA1': hashes['sha1'], 'SHA256': hashes['sha256'], 'PEHash': hashes['peHash'], 'ImpHash': self.pe.get_imphash(), 'SSDeep': self.get_ssdeep(), 'Type': subprocess.getoutput('file -b %s' % self.file_name), 'Size': (os.path.getsize(self.file_name) / 1000), 'Packed': peutils.is_probably_packed(self.pe), 'Arch': pefile.MACHINE_TYPE[self.pe.FILE_HEADER.Machine], 'Entry Point': hex(self.pe.OPTIONAL_HEADER.AddressOfEntryPoint), 'Compiled': self.get_timestamp(), 'Start Address': self.get_start_address(), 'Sections': self.get_sections(), 'Security': self.get_security(), 'Suspicious Imports': self.get_suspicious_imports(), 'Interesting Strings': self.get_interesting_strings() } if self.yara_rules is not None: tags, hits = [], [] all_rules = os.listdir(self.yara_rules) for _file in all_rules: path = all_rules + '/' + _file rule = yara.compile(path) matches = rule.match(data=open(self.file_name, 'rb').read()) for m in matches: if m.rule not in hits: hits.append(m.rule) for tag in m.tags: if tag not in tags: tags.append(tag) self.results['Yara'] = {'Matces': hits, 'Tags': tags} if self.vt_key is not None: self.results['VirusTotal'] = get_vt_report(self.vt_key, self.results['md5']) return self.results
def pefileScan(target: Path) -> Dict[str, str]: '''pefile scan target Args: target: A Path to target file Raise: ZhongkuiScanError Return: A dict result ''' pe = pefile.PE(target) pe_info = PEfileInfo() try: # parse header pe_info.header.timestamp = str( datetime.fromtimestamp(pe.FILE_HEADER.TimeDateStamp)) pe_info.header.sections = pe.FILE_HEADER.NumberOfSections pe_info.header.entryPoint = str(pe.OPTIONAL_HEADER.AddressOfEntryPoint) # parse sections for section in pe.sections: sec_info = PESection() sec_info.name = bytes([i for i in section.Name if i != 0]).decode("utf-8") sec_info.virtualAddress = str(section.VirtualAddress) sec_info.virtualSize = str(section.Misc_VirtualSize) sec_info.rawSize = str(section.SizeOfRawData) sec_info.entropy = round(section.get_entropy(), 2) sec_info.md5 = section.get_hash_md5() pe_info.sections.append(sec_info) # parse imports for entry in pe.DIRECTORY_ENTRY_IMPORT: imp_info = PEImport() imp_info.dllName = entry.dll.decode("utf-8") imp_info.importFunctions = [ imp.name.decode("utf-8") for imp in entry.imports ] pe_info.imports.append(imp_info) # is_probably_packed pe_info.isProbablyPacked = is_probably_packed(pe) except Exception as e: log.error("pefile parse error: {}".format(e)) raise ZhongkuiScanError("pefile parse error: {}".format(e)) return asdict(pe_info)
def possible_packing(pe): packed = peutils.is_probably_packed(pe) emptysec = False pepack = False if packed == 1: pepack = True # Non-Ascii or empty section name check for sec in pe.sections: if not re.match("^[.A-Za-z][a-zA-Z]+", sec.Name): emptysec = True # Entry point check enaddr = pe.OPTIONAL_HEADER.AddressOfEntryPoint vbsecaddr = pe.sections[0].VirtualAddress ensecaddr = pe.sections[0].Misc_VirtualSize entaddr = vbsecaddr + ensecaddr return pepack, emptysec, enaddr, vbsecaddr, ensecaddr, entaddr
def fingerprinting(self, _pe_file): _info = {} _info['size'] = (os.path.getsize(_pe_file)) / 1000, "KB" _deter_file = magic.from_file(_pe_file) _info['type file'] = _deter_file with open(_pe_file, 'rb') as f: _file = f.read() _info['hash sha1'] = hashlib.sha1(_file).hexdigest() _info['hash sha256'] = hashlib.sha256(_file).hexdigest() _md5 = hashlib.md5() for i in range(0, len(_file), 8192): _md5.update(_file[i:i + 8192]) _info['hash md5'] = _md5.hexdigest() _pe_file = pefile.PE(_pe_file) _compile_date = _pe_file.FILE_HEADER.TimeDateStamp _info['compile date'] = str( time.strftime("%Y-%m%d %H:%M:%S", time.localtime(_compile_date))) _info['is probably packed'] = peutils.is_probably_packed(_pe_file) return _info
def anomalies(file): """ source: http://securityxploded.com/exe-scan.php notes: using the peutils version from : http://malware-analysis.googlecode.com/svn-history/r74/trunk/MalwareAnalysis/malware_analysis/pe_struct/peutils.py """ ret = [] # Entropy based check.. imported from peutils pack = peutils.is_probably_packed(pe) if pack == 1: ret.append("Based on the sections entropy check, the file is possibly packed") # SizeOfRawData Check.. some times size of raw data value is used to crash some debugging tools. nsec = pe.FILE_HEADER.NumberOfSections for i in range(0,nsec-1): if i == nsec-1: break else: nextp = pe.sections[i].SizeOfRawData + pe.sections[i].PointerToRawData currp = pe.sections[i+1].PointerToRawData if nextp != currp: ret.append("The Size Of Raw data is valued illegal... The binary might crash your disassembler/debugger") break else: pass # Non-Ascii or empty section name check for sec in pe.sections: if not re.match("^[.A-Za-z][a-zA-Z]+",sec.Name): ret.append("Non-ASCII or empty section names detected") break # Size of optional header check if pe.FILE_HEADER.SizeOfOptionalHeader != 224: ret.append("Illegal size of optional Header") # Zero checksum check if pe.OPTIONAL_HEADER.CheckSum == 0: ret.append("Header Checksum is zero") # Entry point check enaddr = pe.OPTIONAL_HEADER.AddressOfEntryPoint vbsecaddr = pe.sections[0].VirtualAddress ensecaddr = pe.sections[0].Misc_VirtualSize entaddr = vbsecaddr + ensecaddr if enaddr > entaddr: ret.append("Enrty point is outside the 1st(.code) section. Binary is possibly packed") # Number of directories check if pe.OPTIONAL_HEADER.NumberOfRvaAndSizes != 16: ret.append("Optional Header NumberOfRvaAndSizes field is valued illegal") # Loader flags check if pe.OPTIONAL_HEADER.LoaderFlags != 0: ret.append("Optional Header LoaderFlags field is valued illegal") # TLS (Thread Local Storage) callback function check if hasattr(pe,"DIRECTORY_ENTRY_TLS"): ret.append("TLS callback functions array detected at 0x%x" % pe.DIRECTORY_ENTRY_TLS.struct.AddressOfCallBacks) callback_rva = pe.DIRECTORY_ENTRY_TLS.struct.AddressOfCallBacks - pe.OPTIONAL_HEADER.ImageBase ret.append("Callback Array RVA 0x%x" % callback_rva) # Service DLL check if hasattr(pe,"DIRECTORY_ENTRY_EXPORT"): exp_count = len(pe.DIRECTORY_ENTRY_EXPORT.symbols) for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols: if re.match('ServiceMain', exp.name): ret.append("ServiceMain exported, looks to be a service") # EXE file with exports check import magic # ! this is a repetetive task from info within pescanner try: with open(file, "rb") as f: data = f.read() ms = magic.open(magic.MAGIC_NONE) ms.load() if not re.match('.*\(DLL\)\s\(GUI\).*', ms.buffer(data)) and exp_count > 1: ret.append("EXE file with exports") else: # DLL without an export for either of ServiceMain or DllMain check dll_ep = [e for e in pe.DIRECTORY_ENTRY_EXPORT.symbols if re.match('ServiceMain|DllMain', e.name)] if not dll_ep: ret.append("DLL doesn't contain either of ServiceMain or DllMain") except Exception, msg: print msg pass
def checkIfPacked2(pe): if peutils.is_probably_packed(pe): return 1 else: return 0
def generate_sample_packer_id(file_list): # Generate scalar packer IDs for each sample. pid = os.getpid() file_name = "data/" + str(pid) + "-sorted-packer-id-features.csv" fop = open(file_name, 'w') #fop.write('file_name,packer_type,label,is_valid,is_packed\n') put column headers in during the combine stage. out_lines = [] packer_id_map = load_packer_id_map() signatures = peutils.SignatureDatabase('data/userdb-sans.txt') non_pe_counter = 0 pe_file_counter = 0 exception_counter = 0 signat = 'unknown' error_str = 'none' for idx, file_name in enumerate(file_list): tokens = file_name.split('_') truncated_file_name = tokens[ 1] # remove the VirusShare_ prefix from the filename. matches = None packer_id = 0 is_valid = 0 is_packed = 0 try: pe = pefile.PE(ext_drive + file_name, fast_load=True) pe_file_counter += 1 #matches = signatures.match_all(pe, ep_only = True) is_valid = 1 try: if peutils.is_probably_packed( pe ): # NOTE: peutils.is_valid() has not been implemented yet. #is_valid = 1 is_packed = 1 matches = signatures.match(pe, ep_only=True) signat = matches[0] if (signat in packer_id_map.keys()): packer_id = packer_id_map[signat] else: packer_id = 0 #signat = signat.replace(',','') # remove commas or they will cause an error when loading dataframes. # NOTE: If the signature database has commas in the packer name then remove them or they will # cause problems later on when loading the dataframes. row = truncated_file_name + "," + signat + "," + str( packer_id) + "," + str(is_valid) + "," + str( is_packed) + "\n" except: signat = ",unknown,0," + str(is_valid) + "," + str( is_packed) + "\n" row = truncated_file_name + signat pe.close() except Exception as e: error_str = str(e) non_pe_counter += 1 error_str = error_str.replace( ',', '' ) # remove commas or they will cause an error when loading dataframes. signat = "," + error_str + ",0,0,0\n" row = truncated_file_name + signat out_lines.append(row) if (idx % 1000) == 0: # print progress fop.writelines(out_lines) out_lines = [] print('{:s} - {:s} - {:d} - {:s}'.format(str(pid), truncated_file_name, idx, signat)) if len(out_lines) > 0: fop.writelines(out_lines) out_lines = [] fop.close() print('{:s} - Completed {:d} non PE files and {:d} PE files.'.format( str(pid), non_pe_counter, pe_file_counter)) return
def analyze(self): """ Analyze the loaded file. :return: -- A List of results """ results = [] modbl = ['MSVBVM60.DLL', 'MSVBVM50.DLL'] modlist = [ 'KERNEL32.DLL', 'USER32.DLL', 'WINMM.DLL', 'NTDLL.DLL', 'PSAPI.DLL' ] dbglist = [ 'isdebuggerpresent', 'checkremotedebuggerpresent', 'gettickcount', 'outputdebugstring', 'ntqueryobject', 'findwindow', 'timegettime', 'ntqueryinformationprocess', 'isprocessorfeaturepresent', 'ntquerysysteminformation', 'createtoolhelp32snapshot', 'blockinput', 'setunhandledexceptionfilter', 'queryperformancecounter', 'ntsetdebugfilterstate', 'dbgbreakpoint', 'rtlqueryprocessdebuginformation', 'blockinput' ] dsdcalls = ['createdesktop', 'switchdesktop'] importbl = [ 'openprocess', 'virtualallocex', 'writeprocessmemory', 'createremotethread', 'readprocessmemory', 'createprocess', 'winexec', 'shellexecute', 'httpsendrequest', 'internetreadfile', 'internetconnect', 'createservice', 'startservice' ] importwl = ['terminateprocess'] # Standard section names. predef_sections = [ '.text', '.bss', '.rdata', '.data', '.rsrc', '.edata', '.idata', '.pdata', '.debug', '.reloc', '.sxdata', '.tls' ] # Get filetype # results.append(AnalysisResult(2, 'File Type', self.magic_type(self.filename))) if not self.pe.verify_checksum(): results.append(AnalysisResult(0, 'Checksum', "Invalid CheckSum")) if peutils.is_probably_packed(self.pe): results.append( AnalysisResult(1, 'Packed', "Sample is probably packed")) if self.detect_overlay() > 0: results.append( AnalysisResult( 1, 'Overlay', "Detected Overlay [%s]" % hex(self.detect_overlay()))) modules = self.listimports() impcount = len(modules) dsd = 0 dotnet = False for modulename in modules: if modulename == 'mscoree.dll': dotnet = True continue if modulename in modbl: results.append( AnalysisResult(0, 'Imports', "Suspicious Import [%s]" % modulename)) if modulename.upper() in modlist: for symbol in modules[modulename]: if symbol.name.lower() in dbglist: results.append( AnalysisResult( 0, 'AntiDebug', 'AntiDebug Function import [%s]' % symbol.name)) if symbol.name.lower() in importbl: results.append( AnalysisResult( 0, 'Imports', 'Suspicious API Call [%s]' % symbol.name)) if symbol.name.lower() in importwl: results.append( AnalysisResult( 1, 'Imports', 'Suspicious API Call [%s]' % symbol.name)) if symbol.name.lower() in dsdcalls: dsd += 1 # If the sample is dotnet, don't warn on a low import count. if impcount < 3 and not dotnet: results.append( AnalysisResult(1, 'Imports', "Low import count %d " % impcount)) if dsd == 2: results.append( AnalysisResult( 0, 'AntiDebug', 'AntiDebug Function import CreateDesktop/SwitchDestkop')) sections = self.pe.sections for section in sections: if section.Name.strip('\0') not in predef_sections: results.append( AnalysisResult( 1, 'Sections', 'Uncommon Section Name [%s]' % section.Name.strip('\0'))) if section.SizeOfRawData == 0: results.append( AnalysisResult( 1, 'Sections', 'Raw Section Size is 0 [%s]' % section.Name.strip('\0'))) return results
def main(): print("\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") print("PETri.py " + __version__ + " Author: " + __author__ + " " + __email__) print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") print("\nScript initialised...") # Binary binary = source name = os.path.basename(binary) # File Signature Check result = check_signature(binary) if result == True: print( "\nFile signature match for PE binary, proceeding with analysis..." ) else: sys.exit( "\nFile signature did NOT match for a PE binary, terminating analysis." ) # Read Data f = open(binary, "rb") data = f.read() size = len(data) # Header fh = open(binary, "rb") header = fh.read(256) # For PE pe = pefile.PE(binary) # Report out = os.path.join("PETri_" + time.strftime("%Y%m%d_%H%M%S") + "_" + name + ".txt") report = open(out, "w") report.write("~" * 53) report.write("\nPETri.py " + __version__ + " Author: " + __author__ + " " + __email__ + "\n") report.write("~" * 53) # Metadata report.write("\n\n########## Metadata ##########\n\n") report.write("File Name: " + name + "\n") report.write("File Size: " + str(size) + " bytes" + "\n") # Packed report.write("\n########## Packed ##########\n\n") packed = peutils.is_probably_packed(pe) if packed == 1: report.write("Is Packed?: PACKED." + "\n") else: report.write("Is Packed?: Not Packed." + "\n") # PE Header report.write("\n########## PE Header ##########\n\n") hd = pe.DOS_HEADER.e_magic if hex(hd == '0x5a4d'): h = "0x5a4d (MZ)" else: h = "Unknown" report.write("DOS Header: " + h + "\n") sig = pe.NT_HEADERS.Signature if hex(sig == '0x4550'): s = "0x4550 (PE)" else: s = "Unknown" report.write("NT Header: " + s + "\n") hdr = (hexdump.hexdump(header, result='return')) report.write("\n" + hdr + "\n") # File Properties report.write("\n########## File Properties ##########\n\n") if hasattr(pe, 'VS_VERSIONINFO'): if hasattr(pe, 'FileInfo'): for fi in pe.FileInfo: for entry in fi: if hasattr(entry, 'StringTable'): for st_entry in entry.StringTable: for entry in st_entry.entries.items(): report.write( str(entry[0].decode('utf-8')) + " -> " + (str(entry[1].decode('utf-8')) + "\n")) else: report.write("No file properties identified.\n") # Hashing h1 = hashlib.md5() h2 = hashlib.sha1() h3 = hashlib.sha256() h4 = hashlib.sha512() h1.update(data) h2.update(data) h3.update(data) h4.update(data) md5 = h1.hexdigest() sha1 = h2.hexdigest() sha256 = h3.hexdigest() sha512 = h4.hexdigest() imphash = pe.get_imphash() report.write("\n########## Hash Values ##########\n\n") report.write("MD5: " + md5 + "\n") report.write("SHA1: " + sha1 + "\n") report.write("SHA256: " + sha256 + "\n") report.write("SHA512: " + sha512 + "\n") report.write("Imphash: " + imphash + "\n") # COFF File Header (Object and Image) report.write("\n########## COFF File Header ##########\n\n") dll = pe.FILE_HEADER.IMAGE_FILE_DLL if dll == 1: d = "Yes" else: d = "No" report.write("Is DLL?: " + d + "\n") machine = pe.FILE_HEADER.Machine report.write("Target Machine: " + cpu.get(machine) + "\n") sections = pe.FILE_HEADER.NumberOfSections report.write("No of Sections: " + (str(sections)) + "\n") compiled = pe.FILE_HEADER.TimeDateStamp report.write("Compiled Date: " + UNIX10(compiled) + "\n") symbol_table = pe.FILE_HEADER.PointerToSymbolTable report.write("Symbol Table Pointer: " + (hex(symbol_table)) + "\n") no_symbols = pe.FILE_HEADER.NumberOfSymbols report.write("No of Symbols: " + (str(no_symbols)) + "\n") op_header_size = pe.FILE_HEADER.SizeOfOptionalHeader report.write("Optional Header Size: " + (str(op_header_size)) + " bytes" + "\n") characteristics = pe.FILE_HEADER.Characteristics report.write("Characteristics: " + (hex(characteristics)) + "\n") # Optional Header Standard Fields (Image Only) report.write("\n########## Optional File Header ##########\n\n") mag = pe.OPTIONAL_HEADER.Magic report.write("Magic: " + magic_no.get(mag) + "\n") malv = pe.OPTIONAL_HEADER.MajorLinkerVersion report.write("Major Link Version: " + str(malv) + "\n") milv = pe.OPTIONAL_HEADER.MinorLinkerVersion report.write("Minor Link Version: " + str(milv) + "\n") text_section_size = pe.OPTIONAL_HEADER.SizeOfCode report.write("Code Size: " + str(text_section_size) + " bytes" + "\n") soid = pe.OPTIONAL_HEADER.SizeOfInitializedData report.write("Initialised Size: " + str(soid) + " bytes" + "\n") soud = pe.OPTIONAL_HEADER.SizeOfUninitializedData report.write("Uninitialised Size: " + str(soud) + " bytes" + "\n") entry = pe.OPTIONAL_HEADER.AddressOfEntryPoint report.write("Entry Pointer: " + hex(entry) + "\n") # Optional Header - Extension Fields (Image Only) report.write("-" * 40 + "\n") boc = pe.OPTIONAL_HEADER.BaseOfCode report.write("Base of Code: " + hex(boc) + "\n") try: bod = pe.OPTIONAL_HEADER.BaseOfData report.write("Base of Data: " + hex(bod) + "\n") except: report.write("Base of Data: " + "No BaseOfData field.\n") ib = pe.OPTIONAL_HEADER.ImageBase report.write("Image Base: " + hex(ib) + "\n") sa = pe.OPTIONAL_HEADER.SectionAlignment report.write("Section Alignment: " + hex(sa) + "\n") fa = pe.OPTIONAL_HEADER.FileAlignment report.write("File Alignment: " + hex(fa) + "\n") maosv = pe.OPTIONAL_HEADER.MajorOperatingSystemVersion report.write("Major OS Version: " + str(maosv) + "\n") miosv = pe.OPTIONAL_HEADER.MinorOperatingSystemVersion report.write("Minor OS Version: " + str(miosv) + "\n") maiv = pe.OPTIONAL_HEADER.MajorImageVersion report.write("Major Image Version: " + str(maiv) + "\n") miiv = pe.OPTIONAL_HEADER.MinorImageVersion report.write("Minor OS Version: " + str(miiv) + "\n") massv = pe.OPTIONAL_HEADER.MajorSubsystemVersion report.write("Major Sub Version: " + str(massv) + "\n") missv = pe.OPTIONAL_HEADER.MinorSubsystemVersion report.write("Minor Sub Version: " + str(missv) + "\n") w32v = pe.OPTIONAL_HEADER.Reserved1 report.write("Win32 Version: " + str(w32v) + "\n") soi = pe.OPTIONAL_HEADER.SizeOfImage report.write("Size of Image: " + str(soi) + " bytes" + "\n") soh = pe.OPTIONAL_HEADER.SizeOfHeaders report.write("Size of Headers: " + str(soh) + " bytes" + "\n") csum = pe.OPTIONAL_HEADER.CheckSum report.write("CheckSum: " + hex(csum) + "\n") ss = pe.OPTIONAL_HEADER.Subsystem report.write("Subsystem: " + subsystem.get(ss) + "\n") dllc = pe.OPTIONAL_HEADER.DllCharacteristics if dllchar.get(dllc) is not None: report.write("DllCharacteristics: " + dllchar.get(dllc) + "\n") else: report.write("DllCharacteristics: " + str(dllc) + " - Unknown DLL Characteristic.\n") sosr = pe.OPTIONAL_HEADER.SizeOfStackReserve report.write("Size of Stack Res: " + str(sosr) + " bytes" + "\n") sosc = pe.OPTIONAL_HEADER.SizeOfStackCommit report.write("Size of Stack Com: " + str(sosc) + " bytes" + "\n") sohr = pe.OPTIONAL_HEADER.SizeOfHeapReserve report.write("Size of Heap Reserve: " + str(sohr) + " bytes" + "\n") sohc = pe.OPTIONAL_HEADER.SizeOfHeapCommit report.write("Size of Heap Committ: " + str(sohc) + " bytes" + "\n") lf = pe.OPTIONAL_HEADER.LoaderFlags report.write("Loader Flags: " + str(lf) + "\n") nors = pe.OPTIONAL_HEADER.NumberOfRvaAndSizes report.write("No of Rva & Sizes: " + str(nors) + "\n") # Sections report.write("\n########## Sections ##########\n") for section in pe.sections: report.write("\nSection Name: " + section.Name.decode('utf-8')) report.write("\nVirtual Address: " + hex(section.VirtualAddress)) report.write("\nVirtual Size: " + str(section.Misc_VirtualSize) + " bytes") report.write("\nPointer to Raw: " + hex(section.PointerToRawData)) report.write("\nRaw Size: " + str(section.SizeOfRawData) + " bytes") report.write("\nEntropy: " + str(section.get_entropy())) report.write("\nMD5: " + str(section.get_hash_md5())) report.write("\nSHA1: " + str(section.get_hash_sha1())) report.write("\nSHA256: " + str(section.get_hash_sha256())) report.write("\nSHA512: " + str(section.get_hash_sha512()) + "\n") # Data Directories report.write("\n########## Data Directories ##########\n") for dir in pe.OPTIONAL_HEADER.DATA_DIRECTORY: report.write("\nDirectory Name: " + str(dir.name)) report.write("\nVirtual Address: " + hex(dir.VirtualAddress)) report.write("\nSize: " + str(dir.Size) + " bytes") report.write("\n") # Imports report.write("\n########## Imports ##########\n") try: for entry in pe.DIRECTORY_ENTRY_IMPORT: report.write("\n" + entry.dll.decode('utf-8') + "\n") for im in entry.imports: report.write("------------ Offset: " + hex(im.address) + " | Import Function: " + im.name.decode('utf-8') + "\n") except: report.write("\nNo import symbols identified.\n") # Exports report.write("\n########## Exports ##########\n") try: for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols: report.write("\n------------ Offset: " + hex(pe.OPTIONAL_HEADER.ImageBase + exp.address) + " | Export Function: " + str(exp.name.decode('utf-8'))) except: report.write("\nNo export symbols identified.\n") print("\nAnalysis completed, see report for further details!")
def renderPackerStatus(pe): if peutils.is_probably_packed(pe) == True: PrettyPrint('EXE has been packed!') else: PrettyPrint('EXE is not packed.')
def analyze(self): """ Analyze the loaded file. :return: -- A List of results """ results = [] modbl = ['MSVBVM60.DLL', 'MSVBVM50.DLL'] modlist = ['KERNEL32.DLL', 'USER32.DLL', 'WINMM.DLL', 'NTDLL.DLL', 'PSAPI.DLL'] dbglist = ['isdebuggerpresent', 'checkremotedebuggerpresent', 'gettickcount', 'outputdebugstring', 'ntqueryobject', 'findwindow', 'timegettime', 'ntqueryinformationprocess', 'isprocessorfeaturepresent', 'ntquerysysteminformation', 'createtoolhelp32snapshot', 'blockinput', 'setunhandledexceptionfilter', 'queryperformancecounter', 'ntsetdebugfilterstate', 'dbgbreakpoint', 'rtlqueryprocessdebuginformation', 'blockinput'] dsdcalls = ['createdesktop', 'switchdesktop'] importbl = ['openprocess', 'virtualallocex', 'writeprocessmemory', 'createremotethread', 'readprocessmemory', 'createprocess', 'winexec', 'shellexecute', 'httpsendrequest', 'internetreadfile', 'internetconnect', 'createservice', 'startservice'] importwl = ['terminateprocess'] # Standard section names. predef_sections = ['.text', '.bss', '.rdata', '.data', '.rsrc', '.edata', '.idata', '.pdata', '.debug', '.reloc', '.sxdata', '.tls'] # Default section names for common free/commercial packers. # From http://www.hexacorn.com/blog/2012/10/14/random-stats-from-1-2m-samples-pe-section-names/ common_packer_names = { "aspack": "Aspack Packer", ".adata": "Aspack Packer/Armadillo packer", "ASPack": "Aspack packer", ".ASPack": "ASPAck Protector", ".MPRESS1": "MPRESS Packer", ".MPRESS2": "MPRESS Packer", "pebundle": "PEBundle Packer", "PEBundle": "PEBundle Packer", "PEC2TO": "PECompact Packer", "PEC2": "PECompact Packer", "pec1": "PECompact Packer", "pec2": "PECompact Packer", "PEC2MO": "PECompact Packer", "PELOCKnt": "PELock Protector", "PESHiELD": "PEShield Packer", "Themida": "Themida Packer", ".Themida": "Themida Packer", "UPX0": "UPX packer", "UPX1": "UPX packer", "UPX2": "UPX packer", "UPX!": "UPX packer", ".UPX0": "UPX Packer", ".UPX1": "UPX Packer", ".UPX2": "UPX Packer", ".vmp0": "VMProtect packer", ".vmp1": "VMProtect packer", ".vmp2": "VMProtect packer", "VProtect": "Vprotect Packer" } # Get filetype results.append(AnalysisResult(2, 'File Type', self.magic_type(self.filename))) if not self.pe.verify_checksum(): # self.OPTIONAL_HEADER.CheckSum == self.generate_checksum() results.append(AnalysisResult(0, 'Checksum', "The checksum %x does not match %x " % (self.pe.OPTIONAL_HEADER.CheckSum, self.pe.generate_checksum()))) if peutils.is_probably_packed(self.pe): results.append(AnalysisResult(1, 'Packed', "Sample is probably packed")) if self.detect_overlay() > 0: results.append(AnalysisResult(1, 'Overlay', "Detected Overlay [%s]" % hex(self.detect_overlay()))) modules = self.listimports() impcount = len(modules) dsd = 0 dotnet = False for modulename in modules: if modulename == 'mscoree.dll': dotnet = True continue if modulename.upper() in modbl: results.append(AnalysisResult(0, 'Imports', "Suspicious Import [%s]" % modulename)) if modulename.upper() in modlist: for symbol in modules[modulename]: if symbol.name.lower() in dbglist: results.append(AnalysisResult(0, 'AntiDebug', 'Anti-Debug Function import [%s]' % symbol.name)) if symbol.name.lower() in importbl: results.append(AnalysisResult(0, 'Imports', 'Suspicious API Call [%s]' % symbol.name)) if symbol.name.lower() in importwl: results.append(AnalysisResult(1, 'Imports', 'Suspicious API Call [%s]' % symbol.name)) if symbol.name.lower() in dsdcalls: dsd += 1 # If the sample is dotnet, don't warn on a low import count. if impcount < 3 and not dotnet: results.append(AnalysisResult(1, 'Imports', "Low import count %d " % impcount)) if dsd == 2: results.append(AnalysisResult(0, 'AntiDebug', 'AntiDebug Function import CreateDesktop/SwitchDestkop')) sections = self.pe.sections for section in sections: name = section.Name.strip('\0') if name not in predef_sections: if name in common_packer_names.keys(): results.append(AnalysisResult(0, 'Sections', 'The section name [%s] is a common name for the %s' % (name, common_packer_names[name]))) else: results.append(AnalysisResult(1, 'Sections', 'Uncommon Section Name [%s]' % name)) if section.SizeOfRawData == 0: results.append(AnalysisResult(1, 'Sections', 'Raw Section Size is 0 [%s]' % name)) return results
for fun in t[1]: try: out.write(" "+fun+" : "+config.alerts[fun]) except: pass out.write("--------- Entropy ----------") #Entropy with open(path, 'rb') as pe_file: pe_entropy = data_entropy(pe_file.read()) low_high_entropy = pe_entropy < 1 or pe_entropy > 7 if low_high_entropy: out.write("Possibly Packed") p = peutils.is_probably_packed(pe) if p is True: out.write("is packed : True") out.write("--------- Section wise analysis ----------") #section wise anlysis section_data=section_analysis(path) for i in section_data.keys(): out.write(section_data[i]) out.write("--------- get packer details from section names ----------") #get packer details from section names section_names = []
def generate_sample_packer_id(file_list): # Generate scalar packer IDs for each sample. pid = os.getpid() file_name = "data/" + str(pid) + "-sorted-packer-id-features.csv" fop = open(file_name,'w') #fop.write('file_name,packer_type,label,is_valid,is_packed\n') put column headers in during the combine stage. out_lines = [] packer_id_map = load_packer_id_map() signatures = peutils.SignatureDatabase('data/userdb-sans.txt') non_pe_counter = 0 pe_file_counter = 0 exception_counter = 0 signat = 'unknown' error_str = 'none' for idx, file_name in enumerate(file_list): tokens = file_name.split('_') truncated_file_name = tokens[1] # remove the VirusShare_ prefix from the filename. matches = None packer_id = 0 is_valid = 0 is_packed = 0 try: pe = pefile.PE(ext_drive + file_name, fast_load=True) pe_file_counter += 1 #matches = signatures.match_all(pe, ep_only = True) is_valid = 1 try: if peutils.is_probably_packed(pe): # NOTE: peutils.is_valid() has not been implemented yet. #is_valid = 1 is_packed = 1 matches = signatures.match(pe, ep_only = True) signat = matches[0] if (signat in packer_id_map.keys()): packer_id = packer_id_map[signat] else: packer_id = 0 #signat = signat.replace(',','') # remove commas or they will cause an error when loading dataframes. # NOTE: If the signature database has commas in the packer name then remove them or they will # cause problems later on when loading the dataframes. row = truncated_file_name + "," + signat + "," + str(packer_id) + "," + str(is_valid) + "," + str(is_packed) + "\n" except: signat = ",unknown,0," + str(is_valid) + "," + str(is_packed) + "\n" row = truncated_file_name + signat pe.close() except Exception as e: error_str = str(e) non_pe_counter += 1 error_str = error_str.replace(',','') # remove commas or they will cause an error when loading dataframes. signat = "," + error_str + ",0,0,0\n" row = truncated_file_name + signat out_lines.append(row) if (idx % 1000) == 0: # print progress fop.writelines(out_lines) out_lines = [] print('{:s} - {:s} - {:d} - {:s}'.format(str(pid),truncated_file_name,idx,signat)) if len(out_lines) > 0: fop.writelines(out_lines) out_lines = [] fop.close() print('{:s} - Completed {:d} non PE files and {:d} PE files.'.format(str(pid), non_pe_counter, pe_file_counter)) return
def analyze(self, afile): '''Analyze the Windows portable executable format. ref: https://en.wikipedia.org/wiki/Portable_Executable Args: afile (FileAnalysis): Mandrake file analysis object. Returns: None ''' if afile.mime_type == 'application/x-dosexec': try: pe = pefile.PE(afile.path) except: afile.errors = afile.errors + [ 'pe plugin: unsupported filetype' ] output = 'None' afile.plugin_output[self.__NAME__] = output return # Collect interesting flags from the binary # Allows >32 bit values for ASLR afile.high_entropy_aslr = pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA afile.uses_aslr = pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE afile.force_integrity = pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_FORCE_INTEGRITY # DEP afile.uses_dep = pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NX_COMPAT afile.force_no_isolation = pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_ISOLATION afile.uses_seh = not pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_SEH afile.no_bind = pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_BIND afile.app_container = pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_APPCONTAINER afile.wdm_driver = pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_WDM_DRIVER afile.uses_cfg = pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_GUARD_CF afile.terminal_server_aware = pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE # Determine whether the pe file is likely to be packed afile.is_probably_packed = peutils.is_probably_packed(pe) # Attach pe parser warnings afile.warnings = pe.get_warnings() # This method determines whether or not the binary is a dll afile.is_dll = pe.is_dll() afile.is_exe = pe.is_exe() afile.is_driver = pe.is_driver() # Does the checksum check out? afile.verify_checksum = pe.verify_checksum() # Determine the compile date of a binary compile_date = datetime.fromtimestamp(pe.FILE_HEADER.TimeDateStamp) afile.compile_date = compile_date # Compute / retrieve the imphash afile.imphash = pe.get_imphash() # Parse out the import table from within the pe file imports = {} if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'): afile.has_import_table = True for entry in pe.DIRECTORY_ENTRY_IMPORT: imports[entry.dll] = [] for imp in entry.imports: imports[entry.dll].append(imp.name) afile.imports = imports # Parse out the export table listed within the pe file exports = [] if hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'): afile.has_export_table = True for entry in pe.DIRECTORY_ENTRY_EXPORT.symbols: exports.append(entry.name) afile.exports = exports pe.close()
def analyze(self, afile): '''Analyze the Windows portable executable format. ref: https://en.wikipedia.org/wiki/Portable_Executable Args: afile (FileAnalysis): Mandrake file analysis object. Returns: None ''' if afile.mime_type == 'application/x-dosexec': try: pe = pefile.PE(afile.path) except: afile.errors = afile.errors + ['pe plugin: unsupported filetype'] output = 'None' afile.plugin_output[self.__NAME__] = output return # Collect interesting flags from the binary # Allows >32 bit values for ASLR afile.high_entropy_aslr = pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA afile.uses_aslr = pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE afile.force_integrity = pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_FORCE_INTEGRITY # DEP afile.uses_dep = pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NX_COMPAT afile.force_no_isolation = pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_ISOLATION afile.uses_seh = not pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_SEH afile.no_bind = pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_BIND afile.app_container = pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_APPCONTAINER afile.wdm_driver = pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_WDM_DRIVER afile.uses_cfg = pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_GUARD_CF afile.terminal_server_aware = pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE # Determine whether the pe file is likely to be packed afile.is_probably_packed = peutils.is_probably_packed(pe) # Attach pe parser warnings afile.warnings = pe.get_warnings() # This method determines whether or not the binary is a dll afile.is_dll = pe.is_dll() afile.is_exe = pe.is_exe() afile.is_driver = pe.is_driver() # Does the checksum check out? afile.verify_checksum = pe.verify_checksum() # Determine the compile date of a binary compile_date = datetime.fromtimestamp(pe.FILE_HEADER.TimeDateStamp) afile.compile_date = compile_date # Compute / retrieve the imphash afile.imphash = pe.get_imphash() # Parse out the import table from within the pe file imports = {} if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'): afile.has_import_table = True for entry in pe.DIRECTORY_ENTRY_IMPORT: imports[entry.dll] = [] for imp in entry.imports: imports[entry.dll].append(imp.name) afile.imports = imports # Parse out the export table listed within the pe file exports = [] if hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'): afile.has_export_table = True for entry in pe.DIRECTORY_ENTRY_EXPORT.symbols: exports.append(entry.name) afile.exports = exports pe.close()