def getIats(binary, IAT_BASE): p = pepy.parse(binary) all_imports = set() for iobj in p.get_imports(): all_imports.add(iobj.addr + IAT_BASE) logging.debug("iat obj %s at 0x%x" % (iobj.sym, iobj.addr + IAT_BASE)) return all_imports
def getNonRetFuncsFromImportObjs(binary, known_non_ret): p = pepy.parse(binary) for iobj in p.get_imports(): if iobj.sym in KNOWN_NON_RETS: logging.debug("Adding known non-ret %s at 0x%x" % (iobj.sym, iobj.addr)) known_non_ret.add(iobj.addr)
def getAngrBlackAddrs(binary, iat_base): global BLACK_ADDRS p = pepy.parse(binary) for iobj in p.get_imports(): if iobj.sym in angr_black_plt: BLACK_ADDRS.add(iobj.addr + iat_base)
def __init__(self, path): self.path = path p = pepy.parse(path) self.dllcharacteristics = [] for c in dll_characteristics: if p.dllcharacteristics & c[1]: self.dllcharacteristics.append(c[0]) print('0x%x' % p.dllcharacteristics)
def getNonRetFuncsFromImportObjs(binary, known_non_ret): p = pepy.parse(binary) all_imports = set() for iobj in p.get_imports(): all_imports.add(iobj.addr + IAT_BASE) logging.debug("iat obj %s at 0x%x" % (iobj.sym, iobj.addr + IAT_BASE)) if iobj.sym in KNOWN_NON_RETS: logging.debug("Adding known non-ret %s at 0x%x" % (iobj.sym, iobj.addr)) known_non_ret.add(iobj.addr + IAT_BASE) return all_imports
def load(self): self.pe = pepy.parse(self.path) self.data = bytes(self.pe.get_data()) self.sections = self.pe.get_sections() is_exe, is_dll, is_driver = Utils.get_characteristics( self.pe.characteristics) self.dict_pe = { 'type': 'PE', 'sections': [], 'address_entrypoint': hex(self.pe.get_entry_point()), 'section_entrypoint': '', 'path': self.path, 'hashes': {}, 'assembly': '', 'exports': [], 'imports': {}, 'tls': [], 'strings': [], 'is_dll': is_dll, 'is_driver': is_driver, 'is_exe': is_exe, 'x86': self.pe.machine == 0x14c, 'x86_64': self.pe.machine == 0x8664 or self.pe.machine == 0x0200, 'size': os.stat(self.path).st_size, 'number_sections': len(self.sections), 'resources': [], 'Date Compilation': datetime.datetime.fromtimestamp(int( self.pe.timedatestamp)).strftime('%Y-%m-%d %H:%M:%S') } self.dict_pe['hashes']['md5'], self.dict_pe['hashes'][ 'sha1'], self.dict_pe['hashes']['sha256'], self.dict_pe['hashes'][ 'ssdeep'] = Utils.get_hashes(self.data)
#!/usr/bin/env python import sys import time import pepy import binascii from hashlib import md5 try: p = pepy.parse(sys.argv[1]) except pepy.error as e: print(e) sys.exit(1) print("Magic: %s" % hex(p.magic)) print("Signature: %s" % hex(p.signature)) print("Machine: %s (%s)" % (hex(p.machine), p.get_machine_as_str())) print("Number of sections: %s" % p.numberofsections) print("Number of symbols: %s" % p.numberofsymbols) print("Characteristics: %s" % hex(p.characteristics)) print("Timedatestamp: %s" % time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(p.timedatestamp))) print("Major linker version: %s" % hex(p.majorlinkerver)) print("Minor linker version: %s" % hex(p.minorlinkerver)) print("Size of code: %s" % hex(p.codesize)) print("Size of initialized data: %s" % hex(p.initdatasize)) print("Size of uninitialized data: %s" % hex(p.uninitdatasize)) print("Address of entry point: %s" % hex(p.entrypointaddr)) print("Base address of code: %s" % hex(p.baseofcode)) try:
#!/usr/bin/env python import sys import time import pepy import binascii from hashlib import md5 try: p = pepy.parse(sys.argv[1]) except pepy.error as e: print e sys.exit(1) print "Magic: %s" % hex(p.magic) print "Signature: %s" % hex(p.signature) print "Machine: %s" % hex(p.machine) print "Number of sections: %s" % p.numberofsections print "Number of symbols: %s" % p.numberofsymbols print "Characteristics: %s" % hex(p.characteristics) print "Timedatestamp: %s" % time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(p.timedatestamp)) print "Major linker version: %s" % hex(p.majorlinkerver) print "Minor linker version: %s" % hex(p.minorlinkerver) print "Size of code: %s" % hex(p.codesize) print "Size of initialized data: %s" % hex(p.initdatasize) print "Size of uninitialized data: %s" % hex(p.uninitdatasize) print "Address of entry point: %s" % hex(p.entrypointaddr) print "Base address of code: %s" % hex(p.baseofcode) try: print "Base address of data: %s" % hex(p.baseofdata)
def fileDialog(self): x = test(pepy.parse(self.filename)) importedDLL = set() importedSymbols = set() for row in x['symbol']: importedSymbols.add(row[0]) importedDLL.add(row[1]) self.x_list = [x['Baseofcode'], x['baseaddr'], x['characteristics'], x['dllchar'], self.mean_entropy, x['filealign'], x['imagebase'], list(importedDLL), list(importedSymbols), x['Machine'][0], x['Magic'], x['rva'], x['Number of Sections'], x['Number of symbols'], self.mean_petype, self.mean_pointer, self.mean_size, x['CodeSize'], x['headersize'], x['imagesize'], x['SizeofInitial'], self.mean_optionalHeader, x['UninitSize'], self.mean_timestamp] y = "" z = "" m = np.array(self.x_list) imported_dlls = m[7] imported_syms = m[8] m = np.delete(m, 7) m = np.delete(m, 7) m = np.reshape(m, (1, m.shape[0])) print("m:", m) x_test = m n_x_test = np.zeros(shape=(x_test.shape[0], 132)) for i in range(0, x_test.shape[0]): if i % 1000 == 0: print(i) row = df.iloc[i + 40001, :] row_dlls = imported_dlls row_syms = imported_syms row_dlss_str="" row_syms_str="" for ele in row_dlls: row_dlss_str += ele.lower() +" " for ele in row_syms: row_syms_str += ele.lower() +" " print(row_dlss_str) print(row_syms_str) dll_tfidfs = dll_vec.transform([row_dlss_str, ]).toarray()[0] dll_tfidf_pairs = [] for num, dll in enumerate(row_dlss_str.split()): if num == 20: break dll_tfidf = dll_tfidfs[list(dll_vec.get_feature_names()).index(dll)] dll_tfidf_pairs.append([dll_tfidf, list(dll_vec.get_feature_names()).index(dll)]) dll_tfidf_pairs = np.array(dll_tfidf_pairs) # print(dll_tfidf_pairs) dll_tfidf_pairs = dll_tfidf_pairs[dll_tfidf_pairs[:, 0].argsort()[::-1]] for j, pair in enumerate(dll_tfidf_pairs): name = dll_vec.get_feature_names()[int(pair[1])] if name in scrape_dict: n_x_test[i, 3 * j] = scrape_dict[name][0] n_x_test[i, 3 * j + 1] = scrape_dict[name][1] n_x_test[i, 3 * j + 2] = pair[0] else: n_x_test[i, 3 * j] = 1 n_x_test[i, 3 * j + 1] = 4 n_x_test[i, 3 * j + 2] = pair[0] # print(ip1_train) sym_tfidf = sym_vec.transform([row_syms_str, ]).toarray()[0] sym_tfidf = sorted(sym_tfidf, reverse=True)[:50] ip2_train = np.append(x_test[i], sym_tfidf) n_x_test[i, 60:] = ip2_train num = model.predict((n_x_test - self.mean) / (self.var ** 0.5 + 0.069)) print("NUM" + str(num)) if num >= 0 and num <= 0.3: y = "Low" z = "Good to use" elif num > 0.3 and num <= 0.6: y = "Medium" z = "Can be used" elif num > 0.6 and num <= 1: y = "High" z = "Avoid Using" else: y = "Out of range" z = "Cant determine" self.label.config(text="Recommendation : " + y) self.label = ttk.Label(self.labelFrame, text="") self.label.grid(column=1, row=3) self.label.config(text=z)