def disinfect(self, filename, malware_id): # 악성코드 치료 try: # 악성코드 진단 결과에서 받은 ID 값이 0인가? if malware_id == 0: os.remove(filename) # 파일 삭제 return True # 치료 완료 리턴 if malware_id & 0x80000000 == 0x80000000: idx = malware_id & 0x7fffffff import pe buf = open(filename, 'rb').read() pe = pe.PE(buf, False, filename) try: pe_format = pe.parse() # PE 파일 분석 except MemoryError: pe_format = None if pe_format is None: return False ff = {'pe': pe_format} if len(ff['pe']['Sections']) > idx: section = ff['pe']['Sections'][idx] fsize = section['SizeRawData'] foff = section['PointerRawData'] data = buf[:foff] + ('\x00' * fsize) + buf[foff + fsize:] open(filename, 'wb').write(data) return True except IOError: pass return False # 치료 실패 리턴
def main(): # parse arguments # Create an ArgumentParser object parser = argparse.ArgumentParser( prog="Malware Classification", description='Searching an export in a DLL') # Add the first argument: a path to the dll #parser.add_argument('-d', dest='dll_path', help='Specify a dll path') # Add the second argument: an disired export #parser.add_argument('-e', dest='export', help='Specify a disired export') # Add the second argument: an disired export parser.add_argument('-f', dest='file', help='Specify a disired pe file') # Let's parse arguments, the arguments are accessed through args variable args = parser.parse_args() # Create an instance of exports for Kernel32.dll #exports = Exports() # Test pefile = pe.PE() parse_pe(args.file, pefile)
def pe_feature_sub(mm, fname): feature = '' try: # .text 섹션의 Entropy 구하기 pe = pefile.PE(mm, False, fname) pe_format = pe.parse() if not pe_format: return None pe_off = pe_format['PE_Position'] # pe.DOS_HEADER.e_lfanew ep = pe_format['EntryPoint'] # pe.OPTIONAL_HEADER.AddressOfEntryPoint text_off = 0 text_size = 0 for sec in pe_format['Sections']: # pe.sections: rva = sec['RVA'] # sec.VirtualAddress vsize = sec['VirtualSize'] # sec.Misc_VirtualSize if rva <= ep <= rva + vsize: text_off = sec['PointerRawData'] # sec.PointerToRawData text_size = sec['SizeRawData'] # sec.SizeOfRawData break feature += text_entropy(mm[text_off:text_off + text_size]) # PE 헤더 정보 feature += mm[pe_off + 6:pe_off + 6 + 256] # DATA 섹션 2-gram 표현하기 data_off = 0 data_size = 0 for sec in pe_format['Sections']: # pe.sections: if sec['Characteristics'] & 0x40000040 == 0x40000040: # if DATA and Read data_off = sec['PointerRawData'] # sec.PointerToRawData data_size = sec['SizeRawData'] # sec.SizeOfRawData break feature += data_2gram(mm[data_off:data_off + data_size]) # Import API 해시 추가하기 feature += import_api(pe_format) return feature except: import traceback print '[-]', fname print traceback.format_exc() return None
def unarc(self, arc_engine_id, arc_name, fname_in_arc): fp = None mm = None data = None if arc_engine_id.find('arc_upx') != -1: filename = fname_in_arc try: # UPX로 압축된 파일 열기 fp = open(arc_name, 'rb') mm = mmap.mmap(fp.fileno(), 0, access=mmap.ACCESS_READ) p = pe.PE(mm, False, arc_name) pe_format = p.parse() # PE 파일 분석 if pe_format is None: return ValueError pe_img = pe_format['ImageBase'] pe_ep = pe_format['EntryPoint'] sections = pe_format['Sections'] ep_raw = pe_format['EntryPointRaw'] # EP의 Raw 위치 ep_nsec = pe_format[ 'EntryPoint_in_Section'] # EP는 몇번째 섹션에 있는가? foff = 0 ssize = 0 dsize = 0 for section in sections: ssize = section['VirtualSize'] rva = section['RVA'] if rva <= pe_ep < rva + ssize: foff = section['PointerRawData'] i = sections.index(section) if i != 0: upx0 = sections[i - 1]['RVA'] upx1 = sections[i]['RVA'] dsize = sections[i - 1]['VirtualSize'] + ssize break if ssize == 0 or dsize == 0: raise ValueError upx_data_rva = kavutil.get_uint32(mm, ep_raw + 2) sec_rva = sections[ep_nsec]['RVA'] skew = upx_data_rva - sec_rva - pe_img if mm[ep_raw + 1] != '\xBE' or skew <= 0 or skew > 0xFFF: skew = 0 elif skew > ssize: skew = 0 else: raise ValueError data = mm[foff + skew:foff + ssize - skew] unpack_data = '' # UPX 해제된 이미지 if arc_engine_id[8:] == 'nrv2b': # UPX 알고리즘 중 nrv2b 압축인가? ret_val, unpack_data = upx_inflate2b( data, dsize, pe_ep, upx0, upx1, pe_img) if unpack_data == '': # 압축 해제 실패 raise ValueError data = unpack_data except IOError: pass except ValueError: pass if mm: mm.close() if fp: fp.close() return data return None
type=str) parser.add_argument('-o', dest='output', help='desired output report format (json is default)', choices=['json', 'html'], default='json') args = parser.parse_args() # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # parse each file specified # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pedata = [] if args.file and os.path.isfile(args.file): print('[-] parsing file {0} ...'.format(args.file)) pedata.append({ 'filename': os.path.basename(args.file), 'data': pe.PE(args.file, True), 'sha256': sha256_checksum(args.file), }) elif args.directory: for f in os.listdir(args.directory): path = os.path.join(args.directory, f) if os.path.isfile(path): print('[-] parsing file {0} ...'.format(path)) pedata.append({ 'filename': f, 'data': pe.PE(path, True), 'sha256': sha256_checksum(path), }) else: print('[-] ERROR: no files specified for parsing.') quit()
def __init__(self, options): self.__options = options self.__binary = pe.PE(self.__options.binary) print '[+] Parsing PE file completed.'
def main(): # parse arguments # Create an ArgumentParser object parser = argparse.ArgumentParser( prog="Malware Classification", description='Searching an export in a DLL') # Add the first argument: a path to the dll parser.add_argument('-d', dest='dll_path', help='Specify a dll path') # Add the second argument: an disired export parser.add_argument('-e', dest='export', help='Specify a disired export') # Add the second argument: an disired export parser.add_argument('-f', dest='file', help='Specify a disired pe file') # Let's parse arguments, the arguments are accessed through args variable args = parser.parse_args() # Test peobj = pe.PE() parse_pe(args.file, peobj) # Create a LDR instance x86os = datastructure.X86_OS() Dlls = [] # Time for unicorn try: # Initialize an emulator in X86-32bit mode mu = Uc(UC_ARCH_X86, UC_MODE_32) # Map 10MB memory for the emulation mu.mem_map(constants.FS_0, 100 * 1024 * 1024) # Initializing Stack frame fss = x86os.init_FS() mu.mem_write(constants.FS_0, fss) # FS register mu.reg_write(UC_X86_REG_FS, constants.FS_0) # initializing PEB print("Initializing PEB") mu.mem_write(constants.PEB_ADD, x86os.init_peb()) # initializing TEB print("Initializing TEB") mu.mem_write(constants.TEB_ADD, x86os.init_teb()) # initializing stack of emulator mu.reg_write(UC_X86_REG_EBP, constants.STACK_BASE) mu.reg_write(UC_X86_REG_ESP, constants.STACK_BASE) # Testing print("PEB", mu.mem_read(constants.PEB_ADD, len(x86os.init_peb()))) print("TEB", mu.mem_read(constants.TEB_ADD, len(x86os.init_teb()))) print("FS", mu.mem_read(constants.FS_0, len(x86os.init_FS()))) print("FS register: 0x%08x " % (mu.reg_read(UC_X86_REG_FS))) print("ESP register: 0x%08x " % (mu.reg_read(UC_X86_REG_ESP))) print("EBP register: 0x%08x " % (mu.reg_read(UC_X86_REG_EBP))) # Loading dlls print("\n>>> Loading DLLs...") # Iterate DLL directory for subdir, dirs, files in os.walk(args.dll_path): # We need a based address for dlls dllBase = 0x550000 # Base address of DLL name dllNameBase = 0x2500 # Base address of LDR module ldrBase = 0x9000 # Number of Dlls in a directory NoOfDlls = 0 # List of ldr address, to keep track order of the list ldrList = [] # List of in memory Dlls #dllList = DllList() dllList = datastructure.DoubleList() # Iterate dlls in the directory for file in files: # Increase number of Dlls by 1 NoOfDlls = NoOfDlls + 1 # get fullpath of dll fullpath = os.path.join(subdir, file) # Create an instance of DLL dllobj = pe.Dll() # size of the current dll dllobj = dll_loader(fullpath, dllobj, dllBase, dllNameBase, ldrBase) # Writing dll components into memory # Loading the dll into memory loadDlls(mu, dllobj) print("Reading %d bytes from [0x%08x]: %s" % (dllobj.sizeOfDllName(), dllobj.getDllNameBase(), mu.mem_read(dllobj.getDllNameBase(), dllobj.sizeOfDllName()))) print("\n") # size of Dll dllSize = dllobj.sizeOfData() # Store Dll instances into the list Dlls.append(dllobj) # inmemory lsit #dllList.add(dll) dllList.append(dllobj) # Update ldrList ldrList.append(ldrBase) # Align memory to load the next dlls dllBase = dllBase + dllSize # Adjust base address of DLL dllNameBase = dllNameBase + 200 # Each LDR module takes a memory of 8192 Byte # This means we increase the base 0x2000 for each interation ldrBase = ldrBase + 8192 # 0x2000 # Print in-memory list dllList.show() except UcError as e: print("ERROR: %s" % e) mu.emu_stop()
for b in iter(lambda: f.read(block_size), b''): sha256.update(b) sha1.update(b) md5.update(b) return { 'md5': md5.hexdigest(), 'sha1': sha1.hexdigest(), 'sha256': sha256.hexdigest() } pedata = [] if args.file and os.path.isfile(args.file): logging.info('parsing file {0} ...'.format(args.file)) pedata.append({ 'file': os.path.basename(args.file), 'data': pe.PE(args.file), 'hashes': _hash(args.file), }) elif args.directory: for f in os.listdir(args.directory): path = os.path.join(args.directory, f) if os.path.isfile(path): logging.info('parsing file {0}'.format(path)) pedata.append({ 'file': f, 'data': pe.PE(path), 'hashes': _hash(path), }) else: logging.error('no files specified for parsing.') quit()