def systemFunction(self, cpu): if cpu.C == 2: self.systemFunction2(cpu) elif cpu.C == 9: self.systemFunction9(cpu) Opcodes.ret(cpu, 0xc9, cpu.logger) return True #handled
def generate_quasiquote_list(self, values): z = [] counter = 0 y = True for value in values: if isinstance(value, bach_ast.Unquote): z.append(self.generate_unquote(value)) counter += 1 elif isinstance(value, bach_ast.UnquoteList): z.append((BUILD_LIST, counter)) if y: y = False else: z.append((BINARY_ADD, None)) # add to last list z.append(self.generate_unquote(value)) z.append((BINARY_ADD, None)) counter = 0 elif isinstance(value, list): z.append(self.generate_quasiquote_list(value)) counter += 1 else: z.append(self.generate_quote(value)) counter += 1 if counter > 0: z.append((BUILD_LIST, counter)) if not y: z.append((BINARY_ADD, None)) return Opcodes(z)
def generate_label(self, python_label): if len(self.outers) == 0: load = LOAD_GLOBAL elif python_label in self.closures[-1]: load = LOAD_FAST else: load = self.outers[-1][python_label] return Opcodes((load, python_label))
def generate_if(self, test, if_true, if_false): compiled_test, compiled_true = map(self.generate, [test, if_true]) l = self.generate_bytecode_label() forward = self.generate_bytecode_label() if if_false: compiled_false = self.generate(if_false) else: compiled_false = Opcodes((LOAD_CONST, None)) # if can't return a value in python on bytecode level # a branch of if can't leave stuff on the stack # maybe it's a byteplay issue but python itself clean the stack in each branch too # we need to add effect - 1 POP_TOP and one STORE_FAST bach_reserved_if_result_name # so we can put it on the stack after if # effect is the number of stuff on the stack # HACK effect_true, effect_false = self.stack_effect( compiled_true.to_list()), self.stack_effect( compiled_false.to_list()) if effect_true > 0: compiled_true = Opcodes(compiled_true, (STORE_FAST, '_bach_reserved_if'), [(POP_TOP, None)] * (effect_true - 1)) if effect_false > 0: compiled_false = Opcodes(compiled_false, (STORE_FAST, '_bach_reserved_if'), [(POP_TOP, None)] * (effect_false - 1)) return Opcodes(compiled_test, (POP_JUMP_IF_FALSE, l), compiled_true, (JUMP_FORWARD, forward), (l, None), compiled_false, (forward, None), (LOAD_FAST, '_bach_reserved_if'))
def generate_unquotelist(self, expr): if self.in_quasiquote(): return self.generate(expr) elif self.in_quote(): compiled_expr = self.generate_quote(expr) return Opcodes((LOAD_CONST, 'unquote'), compiled_expr, (BUILD_LIST, 6 - 4)) else: raise UnquoteError( "Attempting to call unquote list outside of quasiquote/quote")
def generate_lambda(self, body, args, let_aliases=None): let_fast = set(let_aliases.keys()) if let_aliases else set([]) arg_labels = [arg.label for arg in args] self.closures.append(set(arg_labels) | let_fast) self.outers.append({}) outer_labels = bach_ast.find_outer_labels(body, self.closures[-1]) is_python_closure = False fast = set([]) for label in outer_labels: for closure in self.closures[:-1]: if label in closure: self.outers[-1][label] = LOAD_DEREF fast.add(label) is_python_closure = True break else: self.outers[-1][label] = LOAD_GLOBAL if let_aliases: let_bytecode = [ Opcodes(self.generate(value), (STORE_FAST, a)) for a, value in let_aliases.items() ] else: let_bytecode = [] # arg_labels should be a list, python sets are not ordered, and order matters for lambda code object args member compiled_body = self.compile_function( arg_labels, self.outers[-1], Opcodes(let_bytecode, map(self.generate, body))) self.closures.pop() self.outers.pop() if is_python_closure: fast = [(LOAD_CLOSURE, f) for f in fast] return Opcodes(fast, (BUILD_TUPLE, len(fast)), (LOAD_CONST, compiled_body), (MAKE_CLOSURE, 0)) else: return Opcodes((LOAD_CONST, compiled_body), (MAKE_FUNCTION, 0))
def generate_module(self, sexp, stl=None, return_value=False): data = Code.from_code( z.func_code ) #Code([], [], [], False, False, False, '<>', '<>', 1, 'a bach file') if stl: data.code = stl.code else: data.code = [] self.quote_depth = 0 self.quasiquote_depth = 0 self.closures = [] self.outers = [] opcodes = Opcodes(map(self.generate, sexp.code)).to_list() data.code += opcodes if not return_value: data.code.append((LOAD_CONST, None)) data.code.append((RETURN_VALUE, None)) # print(data.code) return data.to_code()
def generate_define(self, label, value): compiled_value = self.generate(value) return Opcodes(compiled_value, (STORE_GLOBAL, label.label))
def generate_call(self, handler, args): z = map(self.generate, args) h = self.generate(handler) return Opcodes(h, z, (CALL_FUNCTION, len(z)))
def main(): print "%s %s - %s (C) %s\n" % (__title__, __version__, __company__, __year__) # Import the disassembly library (libopcodes) from opcodes import Opcodes, OpcodesException if len(argv) == 1: print "Usage : %s <filename>" % argv[0] return bfd = None try: # # Initialize BFD instance. # We can either pass a filename or a file descriptor and they will be used # in the same way. # print "[+] Creating BFD instance..." #fd = open(argv[1], "r") bfd = Bfd(argv[1]) # Print the file format and in case that its an archive then just show # its files and leave. print "[+] File format : %s" % bfd.file_format_name if bfd.is_archive: print "[-] List of internal files:" #for inner_filename in bfd.archive_filenames: # print "\t%s" % inner_filename for inner_bfd in bfd.archive_files: print "\t%-40s - sections : %d - symbols : %s" % \ (inner_bfd.filename, len(inner_bfd.sections), len(inner_bfd.symbols)) # The bfd.close() is executed bellow in the finally clause. return # # Display some information about the currently open file. # print "[+] Architecture : %s (%d)" % \ (bfd.architecture_name, bfd.architecture) print "[+] BFD target name : %s" % bfd.target print "[+] Entry point : 0x%X" % bfd.start_address print "[+] Sections : %d" % len(bfd.sections) #print "\n".join([str(s) for s in bfd.sections]) # # Get the .text section for further usage. # section_name = ".text" section = bfd.sections.get(section_name) if not section: print "[-] No section \'%s\' available." % section_name return # # Display its name (we get it from the section instance) and its index # inside the binary file. # print "[+] Selected section information:" print "\tName : %s" % section.name print "\tIndex : %d" % section.index # Dump the section content to a buffer content = section.content # Display approximate section length. length = len(content) / 1024 if length == 0: length = len(content) % 1024 length_unit = "Bytes" else: length_unit = "Kbytes" print "\tLength : %(length)d %(length_unit)s" % vars() # # At this point we'll disassemble the entire section content. We'll obtain # instruction address, size, type and disassembly (in text format). # try: # Create opcodes instance to start code section disassembly. opcodes = Opcodes(bfd) # Set the code area we'll move through. opcodes.initialize_smart_disassemble(content, section.vma) # Set an internal -ready to use- callback function to print disassembly # information from the current section content. opcodes.start_smart_disassemble(0, opcodes.print_single_instruction_callback) except OpcodesException, err: print "[-] Opcodes exception : %s" % err #for vma, size, disasm in opcodes.disassemble(content, bfd.start_address): # print "0x%X (%d)\t %s" % (vma, size, disasm) # print disasm, opcodes.build_instruction(vma, size, disasm) # pass except BfdException, err: #print_exc() print "Error : %s" % err
class Memory(object): op = Opcodes() def _getBase(self): base = ulibzeppoo.idtr() return string.atol(base, 16) def _getSystemCall(self, base): temp = self.read(base + 8 * 0x80, 2) off1 = unpack("<H", temp)[0] temp = self.read(base + 8 * 0x80 + 6, 2) off2 = unpack("<H", temp)[0] return (off2 << 16) | off1 def _getSysCallTableFd(self): base = self._getBase() system_call = self._getSystemCall(base) buffer = self.read(system_call, 255) temp = self.op.find_opcodes(buffer, "\xff\x14\x85", 4) return unpack("<L", temp)[0] def _getSysCallTableMmap(self): offset = self.dataFind("\xff\x14\x85") offset = 3221225472 + offset addr = unpack("<L", self.read(offset + 3, 4))[0] #print '0x%x' % addr return addr def getSysCallTable(self): if (self.typeaccess != 0): return self._getSysCallTableMmap() else: return self._getSysCallTableFd() def dataFind(self, data): raise NotImplementedError def dataSeek(self, pos, offset): raise NotImplementedError # Based on p61_BONUS_BONUS by c0de @ UNF <*****@*****.**> def _find_proc_root(self): for t in range(3221225472, 3238002688, 4096): data = self.read(t, 4096) for i in range(0, 4096, 1): try: if (data[i] == '\x01' and data[i + 2] == '\x00' and data[i + 4] == '\x05' and data[i + 12] == '\x6d'): if (data[i + 20] == '\x00' and data[i + 24] == '\x00'): return (t + i) except IndexError: i = 4096 def _find_proc_root_operations(self, proc_root): return unpack("<L", self.read(proc_root + 32, 4))[0] def _find_proc_root_readdir(self, proc_root_operations): return unpack("<L", self.read(proc_root_operations + 24, 4))[0] def _find_proc_pid_readdir(self, proc_root_readdir): data = self.read(proc_root_readdir, 256) offset = data.find("\xe9") tmp = unpack("<L", self.read(proc_root_readdir + offset + 1, 4))[0] offset = offset + 5 return (proc_root_readdir + tmp + offset + 5) def _find_get_tgid_list(self, proc_pid_readdir): data = self.read(proc_pid_readdir, 256) offset = data.find("\xe8") tmp = unpack("<L", self.read(proc_pid_readdir + offset + 1, 4))[0] offset = offset + 5 addr = proc_pid_readdir + tmp + offset + 5 if (addr > 4294967296): addr = addr - 4294967296 return (addr) def _find_init_task(self): proc_root = self._find_proc_root() #print "PROC_ROOT @ 0x%x" % proc_root proc_root_operations = self._find_proc_root_operations(proc_root) #print "PROC_ROOT_OPERATIONS @ 0x%x" % proc_root_operations proc_root_readdir = self._find_proc_root_readdir(proc_root_operations) #print "PROC_ROOT_READDIR @ 0x%x" % proc_root_readdir proc_pid_readdir = self._find_proc_pid_readdir(proc_root_readdir) #print "PROC_PID_READDIR @ 0x%x" % proc_pid_readdir get_tgid_list = self._find_get_tgid_list(proc_pid_readdir) #print "GET_TGID_LIST @ 0x%x" % get_tgid_list data = self.read(get_tgid_list, 256) offset = data.find("\x81") init_task = unpack("<L", self.read(get_tgid_list + offset + 2, 4))[0] return init_task def find_symbol(self, name): if (self.typeaccess == 0): if (name == "init_task"): return self._find_init_task() else: self.dataSeek(0, 0) kstrtab_symbol = self.dataFind(name) #print kstrtab_symbol kstrtab_addr = 3221225472 + kstrtab_symbol #print "__kstrtab @ 0x%x" % kstrtab_addr kstrtab_ascii = pack("<L", kstrtab_addr) offset = self.dataFind(kstrtab_ascii) offset = 3221225472 + offset #print "OFFSET %d" % offset symbol_addr = unpack("<L", self.read(offset - 4, 4))[0] #print 'symbol %s @ 0x%x' %(name, symbol_addr) return symbol_addr def open(self, mode, typeaccess=0): raise NotImplementedError def close(self): raise NotImplementedError def read(self, pos, len): raise NotImplementedError def write(self, pos, buf): raise NotImplementedError def dump(self, pos, len, type): i = 0 var = "" print "Dump Memory @ 0x%x to @ 0x%x" % (pos, pos + len) for i in range(0, len, 4): dump_memory = self.read(pos + i, 4) temp = '%8x' % unpack("<L", dump_memory)[0] if (type == 'h'): var = var + self.op.reverseOpcodes(temp) elif (type == 'v'): var = var + self.op.reverseOpcodes( temp) + '\t' + dump_memory + '\n' print var
def generate_python_module(self, module_name): return Opcodes((LOAD_CONST, -1), (LOAD_CONST, None), (IMPORT_NAME, module_name.label), (STORE_GLOBAL, module_name.label))
def generate_import(self, modules): compiled_modules = map(self.generate_python_module, modules) return Opcodes(compiled_modules)
def generate_set(self, values): v = map(self.generate, values) return Opcodes(v, (BUILD_SET, len(values)))
def generate_dict(self, keys, values): z = map( lambda pair: Opcodes(map(self.generate, pair), (STORE_MAP, None)), zip(values, keys)) return Opcodes((BUILD_MAP, len(z)), z)
def generate_vector(self, values): compiled_values = map(self.generate, values) return Opcodes(compiled_values, (BUILD_LIST, len(values)))
def generate_do(self, body): compiled_body = map(self.generate, body) return Opcodes(compiled_body)
def generate_value(self, value): return Opcodes((LOAD_CONST, value))
def generate_let(self, aliases, body): compiled_lambda = self.generate_lambda(body, [], aliases) return Opcodes(compiled_lambda, (CALL_FUNCTION, 0))
class GVSyscalls : op = Opcodes() syscalls_mem = Syscalls() syscalls_fingerprints = Syscalls() lists_syscalls = [] def __init__(self, mmemory, typeaccess=0) : if not isinstance(mmemory, Memory): raise TypeError("ERREUR") self.mmemory = mmemory self.mmemory.open("r", typeaccess) try : fichier = open("/usr/include/asm/unistd.h", "r") except IOError : print "No such file /usr/include/asm/unistd.h" sys.exit(-1) liste = fichier.readlines() fichier.close() count = 0 for i in liste : if(re.match("#define __NR_", i)) : l = string.split(i) if(l[2][0].isdigit()) : count = string.atoi(l[2], 10) self.lists_syscalls.append([count, l[1][5:]]) else : count = count + 1 self.lists_syscalls.append([count, l[1][5:]]) def __del__(self) : self.mmemory.close() def getSyscalls(self) : sys_call_table = self.mmemory.getSysCallTable() #print 'SYS_CALL_TABLE 0x%x' % sys_call_table for i in self.lists_syscalls : temp = self.mmemory.read(sys_call_table + 4*i[0], 4) self.syscalls_mem.map_syscalls[i[0]] = [unpack("<L", temp)[0], ()] def getOpcodes(self) : for i in self.lists_syscalls : temp = "%8x" % unpack("<L", self.mmemory.read(self.syscalls_mem.map_syscalls[i[0]][0], 4))[0] opcodes = self.op.reverseOpcodes(temp) temp2 = "%8x" % unpack("<L", self.mmemory.read(self.syscalls_mem.map_syscalls[i[0]][0]+4, 4))[0] opcodes = opcodes + " " + self.op.reverseOpcodes(temp2) self.syscalls_mem.map_syscalls[i[0]][1] = opcodes def _simpleViewSyscalls(self, syscalls) : print 'POS\t MEM\t\t NAME\t\t\t\t OPCODES' for i in self.lists_syscalls : print '%d\t 0x%x\t %-15s\t\t %s' % (i[0], syscalls.map_syscalls[i[0]][0], i[1], syscalls.map_syscalls[i[0]][1]) def viewSyscalls(self) : self.getSyscalls() self.getOpcodes() self._simpleViewSyscalls(self.syscalls_mem) def doFingerprints(self, fd) : self.getSyscalls() self.getOpcodes() print "++ Generating Syscalls Fingerprints" fd.write("#\n# BEGIN SYSCALLS FINGERPRINTS\n") for i in self.lists_syscalls : data = "%d %x %s %s\n" % (i[0], self.syscalls_mem.map_syscalls[i[0]][0], i[1], self.syscalls_mem.map_syscalls[i[0]][1]) fd.write(data) fd.write("#\n# END SYSCALLS FINGERPRINTS\n") def checkFingerprints(self, fd) : syscalls_hijack = [] end = 0 self.getSyscalls() self.getOpcodes() i = fd.readline() liste = i.split() while(liste != [] and end == 0): if(liste[0] != '#') : self.syscalls_fingerprints.map_syscalls[int(liste[0])] = [string.atol(liste[1], 16), liste[3] + " " + liste[4]] else : if(len(liste) > 1) : if(liste[1] == "END"): end = -1 i = fd.readline() liste = i.split() print "++ Checking Syscalls Fingerprints !!!" for i in self.lists_syscalls: if((self.syscalls_fingerprints.map_syscalls[i[0]][0] != self.syscalls_mem.map_syscalls[i[0]][0]) or (self.syscalls_fingerprints.map_syscalls[i[0]][1] != self.syscalls_mem.map_syscalls[i[0]][1])): syscalls_hijack.append([i[0], i[1]]) if(syscalls_hijack != []): print "\t** LISTS OF SYSCALLS HIJACK !!" for i in syscalls_hijack: print "\t\t** %d\t %-15s" %(i[0], i[1]) print "\n\t** PLEASE REINSTALL YOUR SYSTEM NOW !!!" else: print "\t** NO SYSCALLS HIJACK"
parser.add_argument( "--edit_ids", type=str, default=None, help="path to edit ids for each sequence in incorr_token_ids") parser = argparse.ArgumentParser() add_arguments(parser) FLAGS, unparsed = parser.parse_known_args() wordpiece_tokenizer = tokenization.FullTokenizer(FLAGS.vocab_path, do_lower_case=False) opcodes = Opcodes( path_common_inserts=FLAGS.common_inserts_dir + "/" + "common_inserts.p", path_common_multitoken_inserts=FLAGS.common_inserts_dir + "/" + "common_multitoken_inserts.p", use_transforms=True) def seq2edits(incorr_line, correct_line): # Seq2Edits function (Described in Section 2.2 of the paper) # obtains edit ids from incorrect and correct tokens # input: incorrect line and correct line # output: incorr_tokens, correct_tokens, incorr token ids, edit ids #tokenize incorr_line and correct_line incorr_tokens = custom_tokenize(incorr_line, wordpiece_tokenizer, mode="train") correct_tokens = custom_tokenize(correct_line,
def generate_quoted_list(self, values): v = map(self.generate_quote, values) return Opcodes(v, (BUILD_LIST, len(values)))
def generate_attribute(self, elements): compiled_attr = [(LOAD_ATTR, element.label) for element in elements[1:]] return Opcodes(self.generate(elements[0]), compiled_attr)
parser.add_argument( "--edit_ids", type=str, default=None, help="path to edit ids for each sequence in incorr_token_ids") parser = argparse.ArgumentParser() add_arguments(parser) FLAGS, unparsed = parser.parse_known_args() wordpiece_tokenizer = tokenization.FullTokenizer(FLAGS.vocab_path, do_lower_case=False) opcodes = Opcodes(path_common_inserts=os.path.join(FLAGS.common_inserts_dir, "common_inserts.p"), path_common_deletes=None, path_common_multitoken_inserts=os.path.join( FLAGS.common_inserts_dir, "common_multitoken_inserts.p"), use_transforms=True) def seq2edits(incorr_line, correct_line): # Seq2Edits function (Described in Section 2.2 of the paper) # obtains edit ids from incorrect and correct tokens # input: incorrect line and correct line # output: incorr_tokens, correct_tokens, incorr token ids, edit ids # tokenize incorr_line and correct_line incorr_tokens = custom_tokenize(incorr_line, wordpiece_tokenizer, mode="train") correct_tokens = custom_tokenize(correct_line,
def generate_symbol(self, value): return Opcodes([(LOAD_GLOBAL, self.BACH_SYMBOL), (LOAD_CONST, value), (CALL_FUNCTION, 1)])