Ejemplo n.º 1
0
def getIats(binary, IAT_BASE):
    p = pepy.parse(binary)
    all_imports = set()
    for iobj in p.get_imports():
        all_imports.add(iobj.addr + IAT_BASE)
        logging.debug("iat obj %s at 0x%x" % (iobj.sym, iobj.addr + IAT_BASE))
    return all_imports
Ejemplo n.º 2
0
def getNonRetFuncsFromImportObjs(binary, known_non_ret):
    p = pepy.parse(binary)
    for iobj in p.get_imports():
        if iobj.sym in KNOWN_NON_RETS:
            logging.debug("Adding known non-ret %s at 0x%x" %
                          (iobj.sym, iobj.addr))
            known_non_ret.add(iobj.addr)
Ejemplo n.º 3
0
def getAngrBlackAddrs(binary, iat_base):
    global BLACK_ADDRS

    p = pepy.parse(binary)

    for iobj in p.get_imports():
        if iobj.sym in angr_black_plt:
            BLACK_ADDRS.add(iobj.addr + iat_base)
Ejemplo n.º 4
0
    def __init__(self, path):
        self.path = path
        p = pepy.parse(path)
        self.dllcharacteristics = []
        for c in dll_characteristics:
            if p.dllcharacteristics & c[1]:
                self.dllcharacteristics.append(c[0])

        print('0x%x' % p.dllcharacteristics)
Ejemplo n.º 5
0
def getNonRetFuncsFromImportObjs(binary, known_non_ret):
    p = pepy.parse(binary)
    all_imports = set()
    for iobj in p.get_imports():
        all_imports.add(iobj.addr + IAT_BASE)
        logging.debug("iat obj %s at 0x%x" % (iobj.sym, iobj.addr + IAT_BASE))
        if iobj.sym in KNOWN_NON_RETS:
            logging.debug("Adding known non-ret %s at 0x%x" %
                          (iobj.sym, iobj.addr))
            known_non_ret.add(iobj.addr + IAT_BASE)
    return all_imports
Ejemplo n.º 6
0
    def load(self):
        self.pe = pepy.parse(self.path)
        self.data = bytes(self.pe.get_data())
        self.sections = self.pe.get_sections()

        is_exe, is_dll, is_driver = Utils.get_characteristics(
            self.pe.characteristics)

        self.dict_pe = {
            'type':
            'PE',
            'sections': [],
            'address_entrypoint':
            hex(self.pe.get_entry_point()),
            'section_entrypoint':
            '',
            'path':
            self.path,
            'hashes': {},
            'assembly':
            '',
            'exports': [],
            'imports': {},
            'tls': [],
            'strings': [],
            'is_dll':
            is_dll,
            'is_driver':
            is_driver,
            'is_exe':
            is_exe,
            'x86':
            self.pe.machine == 0x14c,
            'x86_64':
            self.pe.machine == 0x8664 or self.pe.machine == 0x0200,
            'size':
            os.stat(self.path).st_size,
            'number_sections':
            len(self.sections),
            'resources': [],
            'Date Compilation':
            datetime.datetime.fromtimestamp(int(
                self.pe.timedatestamp)).strftime('%Y-%m-%d %H:%M:%S')
        }

        self.dict_pe['hashes']['md5'], self.dict_pe['hashes'][
            'sha1'], self.dict_pe['hashes']['sha256'], self.dict_pe['hashes'][
                'ssdeep'] = Utils.get_hashes(self.data)
Ejemplo n.º 7
0
#!/usr/bin/env python

import sys
import time
import pepy
import binascii

from hashlib import md5

try:
    p = pepy.parse(sys.argv[1])
except pepy.error as e:
    print(e)
    sys.exit(1)

print("Magic: %s" % hex(p.magic))
print("Signature: %s" % hex(p.signature))
print("Machine: %s (%s)" % (hex(p.machine), p.get_machine_as_str()))
print("Number of sections: %s" % p.numberofsections)
print("Number of symbols: %s" % p.numberofsymbols)
print("Characteristics: %s" % hex(p.characteristics))
print("Timedatestamp: %s" %
      time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(p.timedatestamp)))
print("Major linker version: %s" % hex(p.majorlinkerver))
print("Minor linker version: %s" % hex(p.minorlinkerver))
print("Size of code: %s" % hex(p.codesize))
print("Size of initialized data: %s" % hex(p.initdatasize))
print("Size of uninitialized data: %s" % hex(p.uninitdatasize))
print("Address of entry point: %s" % hex(p.entrypointaddr))
print("Base address of code: %s" % hex(p.baseofcode))
try:
Ejemplo n.º 8
0
#!/usr/bin/env python

import sys
import time
import pepy
import binascii

from hashlib import md5

try:
    p = pepy.parse(sys.argv[1])
except pepy.error as e:
    print e
    sys.exit(1)

print "Magic: %s" % hex(p.magic)
print "Signature: %s" % hex(p.signature)
print "Machine: %s" % hex(p.machine)
print "Number of sections: %s" % p.numberofsections
print "Number of symbols: %s" % p.numberofsymbols
print "Characteristics: %s" % hex(p.characteristics)
print "Timedatestamp: %s" % time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(p.timedatestamp))
print "Major linker version: %s" % hex(p.majorlinkerver)
print "Minor linker version: %s" % hex(p.minorlinkerver)
print "Size of code: %s" % hex(p.codesize)
print "Size of initialized data: %s" % hex(p.initdatasize)
print "Size of uninitialized data: %s" % hex(p.uninitdatasize)
print "Address of entry point: %s" % hex(p.entrypointaddr)
print "Base address of code: %s" % hex(p.baseofcode)
try:
    print "Base address of data: %s" % hex(p.baseofdata)
	def fileDialog(self):
		x = test(pepy.parse(self.filename))

		importedDLL = set()
		importedSymbols = set()
		for row in x['symbol']:
			importedSymbols.add(row[0])
			importedDLL.add(row[1])
		self.x_list = [x['Baseofcode'], x['baseaddr'], x['characteristics'], x['dllchar'], self.mean_entropy,
		               x['filealign'], x['imagebase'], list(importedDLL), list(importedSymbols), x['Machine'][0],
		               x['Magic'], x['rva'], x['Number of Sections'], x['Number of symbols'], self.mean_petype,
		               self.mean_pointer, self.mean_size, x['CodeSize'], x['headersize'], x['imagesize'],
		               x['SizeofInitial'], self.mean_optionalHeader, x['UninitSize'], self.mean_timestamp]
		y = ""
		z = ""
		m = np.array(self.x_list)
		imported_dlls = m[7]
		imported_syms = m[8]
		m = np.delete(m, 7)
		m = np.delete(m, 7)
		m = np.reshape(m, (1, m.shape[0]))
		print("m:", m)

		x_test = m
		n_x_test = np.zeros(shape=(x_test.shape[0], 132))
		for i in range(0, x_test.shape[0]):
			if i % 1000 == 0:
				print(i)
			row = df.iloc[i + 40001, :]
			row_dlls = imported_dlls

			row_syms = imported_syms
			row_dlss_str=""
			row_syms_str=""
			for ele in row_dlls:
				row_dlss_str += ele.lower() +" "
			for ele in row_syms:
				row_syms_str += ele.lower() +" "

			print(row_dlss_str)
			print(row_syms_str)


			dll_tfidfs = dll_vec.transform([row_dlss_str, ]).toarray()[0]
			dll_tfidf_pairs = []
			for num, dll in enumerate(row_dlss_str.split()):
				if num == 20:
					break
				dll_tfidf = dll_tfidfs[list(dll_vec.get_feature_names()).index(dll)]
				dll_tfidf_pairs.append([dll_tfidf, list(dll_vec.get_feature_names()).index(dll)])

			dll_tfidf_pairs = np.array(dll_tfidf_pairs)
			# print(dll_tfidf_pairs)
			dll_tfidf_pairs = dll_tfidf_pairs[dll_tfidf_pairs[:, 0].argsort()[::-1]]

			for j, pair in enumerate(dll_tfidf_pairs):
				name = dll_vec.get_feature_names()[int(pair[1])]
				if name in scrape_dict:
					n_x_test[i, 3 * j] = scrape_dict[name][0]
					n_x_test[i, 3 * j + 1] = scrape_dict[name][1]
					n_x_test[i, 3 * j + 2] = pair[0]
				else:
					n_x_test[i, 3 * j] = 1
					n_x_test[i, 3 * j + 1] = 4
					n_x_test[i, 3 * j + 2] = pair[0]
			#     print(ip1_train)

			sym_tfidf = sym_vec.transform([row_syms_str, ]).toarray()[0]
			sym_tfidf = sorted(sym_tfidf, reverse=True)[:50]
			ip2_train = np.append(x_test[i], sym_tfidf)
			n_x_test[i, 60:] = ip2_train

		num = model.predict((n_x_test - self.mean) / (self.var ** 0.5 + 0.069))
		print("NUM" + str(num))
		if num >= 0 and num <= 0.3:
			y = "Low"
			z = "Good to use"
		elif num > 0.3 and num <= 0.6:
			y = "Medium"
			z = "Can be used"
		elif num > 0.6 and num <= 1:
			y = "High"
			z = "Avoid Using"
		else:
			y = "Out of range"
			z = "Cant determine"
		self.label.config(text="Recommendation : " + y)
		self.label = ttk.Label(self.labelFrame, text="")
		self.label.grid(column=1, row=3)
		self.label.config(text=z)