def get_callback_code(self, address, mapping, cbargs):
     '''Remaps each callback argument on the stack based on index.  cbargs is an array of argument indices
    that let us know where on the stack we must rewrite.  We insert code for each we must rewrite.'''
     callback_template_before = '''
 mov eax, [esp+(%s*4)]
 '''
     callback_template_after = '''
 call $+%s
 mov [esp+(%s*4)], eax
 '''
     code = asm('push eax')  #Save eax, use to hold callback address
     for ind in cbargs:
         #Add 2 because we must skip over the saved value of eax and the return value already pushed
         #ASSUMPTION: before this instruction OR this instruction if it IS a call, a return address was
         #pushed.  Since this *probably* is taking place inside the PLT, in all probability this is a
         #jmp instruction, and the call that got us *into* the PLT pushed a return address, so we can't rely
         #on the current instruction to tell us this either way.  Therefore, we are *assuming* that the PLT
         #is always entered via a call instruction, or that somebody is calling an address in the GOT directly.
         #If code ever jmps based on an address in the got, we will probably corrupt the stack.
         cb_before = callback_template_before % (ind + 2)
         code += asm(
             cb_before
         )  #Assemble this part first so we will know the offset to the lookup function
         size = len(code)
         lookup_target = self.remap_target(
             address, mapping, self.context.lookup_function_offset, size)
         cb_after = callback_template_after % (lookup_target, ind + 2)
         code += asm(cb_after)  #Save the new address over the original
     code += asm('pop eax')  #Restore eax
     return code
 def translate_ret(self, ins, mapping):
     '''
 mov [esp-28], eax	;save old eax value
 pop eax		;pop address from stack from which we will get destination
 call $+%s		;call lookup function
 mov [esp-4], eax	;save new eax value (destination mapping)
 mov eax, [esp-32]	;restore old eax value (the pop has shifted our stack so we must look at 28+4=32)
 jmp [esp-4]		;jmp/call to new address
 '''
     template_before = '''
 mov [esp-28], eax
 pop eax
 '''
     template_after = '''
 call $+%s
 %s
 mov [esp-4], eax
 mov eax, [esp-%d]
 jmp [esp-4]
 '''
     self.context.stat['ret'] += 1
     code = b''
     inserted = self.before_inst_callback(ins)
     if inserted is not None:
         code += inserted
     if self.context.no_pic and ins.address != self.context.get_pc_thunk + 3:
         #Perform a normal return UNLESS this is the ret for the thunk.
         #Currently its position is hardcoded as three bytes after the thunk entry.
         code = asm('ret %s' % ins.op_str)
     else:
         code = asm(template_before)
         size = len(code)
         lookup_target = b''
         if self.context.exec_only:
             #Special lookup for not rewriting arguments when going outside new main text address space
             lookup_target = self.remap_target(
                 ins.address, mapping,
                 self.context.secondary_lookup_function_offset, size)
         else:
             lookup_target = self.remap_target(
                 ins.address, mapping, self.context.lookup_function_offset,
                 size)
         if ins.op_str == '':
             code += asm(template_after %
                         (lookup_target, '',
                          32))  #32 because of the value we popped
         else:  #For ret instructions that pop imm16 bytes from the stack, add that many bytes to esp
             pop_amt = int(
                 ins.op_str, 16
             )  #We need to retrieve the right eax value from where we saved it
             code += asm(
                 template_after %
                 (lookup_target, 'add esp,%d' % pop_amt, 32 + pop_amt))
     return code
    def translate_cond(self, ins, mapping):
        self.context.stat['jcc'] += 1
        patched = b''
        inserted = self.before_inst_callback(ins)
        if inserted is not None:
            patched += inserted
        if ins.mnemonic in ['jcxz', 'jecxz'
                            ]:  #These instructions have no long encoding
            jcxz_template = '''
      test cx,cx
      '''
            jecxz_template = '''
      test ecx,ecx
      '''
            target = ins.operands[
                0].imm  # int(ins.op_str,16) The destination of this instruction
            #newtarget = remap_target(ins.address,mapping,target,0)
            if ins.mnemonic == 'jcxz':
                patched += asm(jcxz_template)
            else:
                patched += asm(jecxz_template)
            newtarget = self.remap_target(ins.address, mapping, target,
                                          len(patched))
            #print 'want %s, but have %s instead'%(remap_target(ins.address,mapping,target,len(patched)), newtarget)
            #Apparently the offset for jcxz and jecxz instructions may have been wrong?  How did it work before?
            patched += asm('jz $+%s' % newtarget)
            #print 'code length: %d'%len(patched)

            #TODO: some instructions encode to 6 bytes, some to 5, some to 2.  How do we know which?
            #For example, for CALL, it seems to only be 5 or 2 depending on offset.
            #But for jg, it can be 2 or 6 depending on offset, I think because it has a 2-byte opcode.
            #while len(patched) < 6: #Short encoding, which we do not want
            #  patched+='\x90' #Add padding of NOPs
            #The previous commented out code wouldn't even WORK now, since we insert another instruction
            #at the MINIMUM.  I'm amazed the jcxz/jecxz code even worked at all before
        else:
            target = ins.operands[
                0].imm  # int(ins.op_str,16) The destination of this instruction
            newtarget = self.remap_target(ins.address, mapping, target,
                                          len(patched))
            patched += asm(ins.mnemonic + ' $+' + newtarget)
            #TODO: some instructions encode to 6 bytes, some to 5, some to 2.  How do we know which?
            #For example, for CALL, it seems to only be 5 or 2 depending on offset.
            #But for jg, it can be 2 or 6 depending on offset, I think because it has a 2-byte opcode.
            #while len(patched) < 6: #Short encoding, which we do not want
            #  patched+='\x90' #Add padding of NOPs
        return patched
 def gen_newcode(self, mapping):
     print 'Generating new code...'
     newbytes = ''
     bytemap = {}
     maplist = []
     last = None  #Last instruction disassembled
     for ins in self.disassembler.disasm(self.bytes, self.base):
         if ins is None and last is not None:  # Encountered a previously disassembled instruction and have not redirected
             target = last.address + len(
                 last.bytes
             )  #address of where in the original code we would want to jmp to
             next_target = self.translator.remap_target(
                 last.address, mapping, target, len(bytemap[last.address]))
             reroute = assembler.asm('jmp $+%s' % (next_target))
             #Maximum relative displacement is 32 for x86 and x64, so this works for both platforms
             if len(reroute) == 2:  #Short encoding, which we do not want
                 reroute += '\x90\x90\x90'  #Add padding of 3 NOPs
             bytemap[last.address] += reroute
             last = None
             maplist.append(bytemap)
             bytemap = {}
         elif ins is not None:
             last = ins
             newins = self.translator.translate_one(
                 ins, mapping)  #In this pass, the mapping is incomplete
             if newins is not None:
                 bytemap[
                     ins.
                     address] = newins  #Old address maps to these new instructions
             else:
                 bytemap[ins.address] = str(
                     ins.bytes
                 )  #This instruction is unchanged, and its old address maps to it
     #Add the lookup function as the first thing in the new text section
     newbytes += self.runtime.get_lookup_code(
         self.base, len(self.bytes), self.context.lookup_function_offset,
         mapping[self.context.mapping_offset])
     if self.context.exec_only:
         newbytes += self.runtime.get_secondary_lookup_code(
             self.base, len(self.bytes),
             self.context.secondary_lookup_function_offset,
             mapping[self.context.mapping_offset])
     count = 0
     for m in maplist:
         for k in sorted(
                 m.keys()
         ):  #For each original address to code, in order of original address
             newbytes += m[k]
     if not self.context.write_so:
         newbytes += self.runtime.get_auxvec_code(mapping[self.entry])
     print 'mapping is being placed at offset: 0x%x' % len(newbytes)
     #Append mapping to end of bytes
     newbytes += self.write_mapping(mapping, self.base, len(self.bytes))
     return newbytes
Example #5
0
 def get_popgm_code(self):
     call_popgm = '''
 pushad
 push %s
 call $+0xa
 add esp,4
 popad
 ret
 '''
     popgmbytes = asm(call_popgm % (self.context.global_sysinfo + 4))
     with open('x86_%s' % self.context.popgm) as f:
         popgmbytes += f.read()
     return popgmbytes
 def gen_mapping(self):
     print 'Generating mapping...'
     mapping = {}
     maplist = []
     currmap = {}
     last = None  #Last instruction disassembled
     reroute = assembler.asm(
         'jmp $+0x8f'
     )  #Dummy jmp to imitate connecting jmp; we may not know dest yet
     for ins in self.disassembler.disasm(self.bytes, self.base):
         if ins is None and last is not None:  # Encountered a previously disassembled instruction and have not redirected
             currmap[last.address] += len(reroute)
             last = None  #If we have not found any more new instructions since our last redirect, don't redirect again
             maplist.append(currmap)
             currmap = {}
         elif ins is not None:
             last = ins  #Remember the last disassembled instruction
             newins = self.translator.translate_one(
                 ins, None)  #In this pass, the mapping is incomplete
             if newins is not None:
                 currmap[ins.address] = len(newins)
             else:
                 currmap[ins.address] = len(ins.bytes)
     self.context.lookup_function_offset = 0  #Place lookup function at start of new text section
     lookup_size = len(
         self.runtime.get_lookup_code(self.base, len(
             self.bytes), 0, 0x8f))  #TODO: Issue with mapping offset & size
     offset = lookup_size
     if self.context.exec_only:
         self.context.secondary_lookup_function_offset = offset
         secondary_lookup_size = len(
             self.runtime.get_secondary_lookup_code(self.base,
                                                    len(self.bytes), offset,
                                                    0x8f))
         offset += secondary_lookup_size
     for m in maplist:
         for k in sorted(m.keys()):
             size = m[k]
             mapping[k] = offset
             offset += size  #Add the size of this instruction to the total offset
     #Now that the mapping is complete, we know the length of it
     self.context.mapping_offset = len(
         self.bytes
     ) + self.base  #Where we pretend the mapping was in the old code
     if not self.context.write_so:
         self.context.new_entry_off = offset  #Set entry point to start of auxvec
         offset += len(
             self.runtime.get_auxvec_code(0x8f)
         )  #Unknown entry addr here, but not needed b/c we just need len
     mapping[self.context.
             lookup_function_offset] = self.context.lookup_function_offset
     if self.context.exec_only:
         #This is a very low number and therefore will not be written out into the final mapping.
         #It is used to convey this offset for the second phase when generating code, specifically
         #for the use of remap_target.  Without setting this it always sets the target to 0x8f. Sigh.
         mapping[
             self.context.
             secondary_lookup_function_offset] = self.context.secondary_lookup_function_offset
     #Don't yet know mapping offset; we must compute it
     mapping[len(self.bytes) + self.base] = offset
     print 'final offset for mapping is: 0x%x' % offset
     if not self.context.write_so:
         #For NOW, place the global data/function at the end of this because we can't necessarily fit
         #another section.  TODO: put this somewhere else
         #The first time, sysinfo's and flag's location is unknown,
         #so they are wrong in the first call to get_global_lookup_code
         #However, the global_flag is moving to a TLS section, so it takes
         #up no space in the global lookup
         #global_flag = global_lookup + len(get_global_lookup_code())
         #popgm goes directly after the global lookup, and global_sysinfo directly after that.
         self.context.popgm_offset = len(
             self.runtime.get_global_lookup_code())
         self.context.global_sysinfo = self.context.global_lookup + self.context.popgm_offset + len(
             self.runtime.get_popgm_code())
         #Now that this is set, the auxvec code should work
     return mapping
 def get_indirect_uncond_code(self, ins, mapping, target):
     #Commented assembly
     '''
 mov [esp-28], eax	;save old eax value (very far above the stack because of future push/call)
 mov eax, %s		;read location in memory from which we will get destination
 %s			;if a call, we push return address here
 call $+%s		;call lookup function
 mov [esp-4], eax	;save new eax value (destination mapping)
 mov eax, [esp-%s]	;restore old eax value (offset depends on whether return address pushed)
 jmp [esp-4]		;jmp to new address
 '''
     template_before = '''
 mov [esp-32], eax
 mov eax, %s
 %s
 '''
     exec_call = '''
 push %s
 '''
     so_call_before = '''
 push ebx
 call $+5
 '''
     so_call_after = '''
 pop ebx
 sub ebx,%s
 xchg ebx,[esp]
 '''
     template_after = '''
 call $+%s
 mov [esp-4], eax
 mov eax, [esp-%s]
 jmp [esp-4]
 '''
     template_nopic = '''
 call $+%s
 mov [esp-4], eax
 mov eax, [esp-%s]
 %s [esp-4]
 '''
     #TODO: This is somehow still the bottleneck, so this needs to be optimized
     code = b''
     if self.context.exec_only:
         code += self.get_remap_callbacks_code(ins.address, mapping, target)
     #NOTE: user instrumentation code comes after callbacks code.  No particular reason to put it either way,
     #other than perhaps consistency, but for now this is easier.
     inserted = self.before_inst_callback(ins)
     if inserted is not None:
         code += inserted
     if self.context.no_pic:
         if ins.mnemonic == 'call':
             self.context.stat['indcall'] += 1
         else:
             self.context.stat['indjmp'] += 1
         code += asm(template_before % (target, ''))
     elif ins.mnemonic == 'call':
         self.context.stat['indcall'] += 1
         if self.context.write_so:
             code += asm(template_before % (target, so_call_before))
             if mapping is not None:
                 code += asm(so_call_after %
                             ((mapping[ins.address] + len(code) +
                               self.context.newbase) -
                              (ins.address + len(ins.bytes))))
                 #print 'CODE LEN/1: %d\n%s'%(len(code),code.encode('hex'))
             else:
                 code += asm(so_call_after %
                             ((0x8f + self.context.newbase) -
                              (ins.address + len(ins.bytes))))
                 #print 'CODE LEN/0: %d\n%s'%(len(code),code.encode('hex'))
         else:
             code += asm(template_before % (target, exec_call %
                                            (ins.address + len(ins.bytes))))
     else:
         self.context.stat['indjmp'] += 1
         code += asm(template_before % (target, ''))
     size = len(code)
     lookup_target = self.remap_target(ins.address, mapping,
                                       self.context.lookup_function_offset,
                                       size)
     #Always transform an unconditional control transfer to a jmp, but
     #for a call, insert a push instruction to push the original return address on the stack.
     #At runtime, our rewritten ret will look up the right address to return to and jmp there.
     #If we push a value on the stack, we have to store even FURTHER away from the stack.
     #Note that calling the lookup function can move the stack pointer temporarily up to
     #20 bytes, which will obliterate anything stored too close to the stack pointer.  That, plus
     #the return value we push on the stack, means we need to put it at least 28 bytes away.
     if self.context.no_pic:
         #Change target to secondary lookup function instead
         lookup_target = self.remap_target(
             ins.address, mapping,
             self.context.secondary_lookup_function_offset, size)
         code += asm(template_nopic % (lookup_target, 32, ins.mnemonic))
     elif ins.mnemonic == 'call':
         code += asm(template_after % (lookup_target, 28))
     else:
         code += asm(template_after % (lookup_target, 32))
     return code
 def translate_uncond(self, ins, mapping):
     op = ins.operands[0]  #Get operand
     if op.type == X86_OP_REG:  # e.g. call eax or jmp ebx
         target = ins.reg_name(op.reg)
         return self.get_indirect_uncond_code(ins, mapping, target)
     elif op.type == X86_OP_MEM:  # e.g. call [eax + ecx*4 + 0xcafebabe] or jmp [ebx+ecx]
         target = ins.op_str
         return self.get_indirect_uncond_code(ins, mapping, target)
     elif op.type == X86_OP_IMM:  # e.g. call 0xdeadbeef or jmp 0xcafebada
         target = op.imm
         code = b''
         inserted = self.before_inst_callback(ins)
         if inserted is not None:
             code += inserted
         if self.context.no_pic and target != self.context.get_pc_thunk:
             #push nothing if no_pic UNLESS it's the thunk
             #We only support DIRECT calls to the thunk
             if ins.mnemonic == 'call':
                 self.context.stat['dircall'] += 1
             else:
                 self.context.stat['dirjmp'] += 1
         elif ins.mnemonic == 'call':  #If it's a call, push the original address of the next instruction
             self.context.stat['dircall'] += 1
             exec_call = '''
     push %s
     '''
             so_call_before = '''
     push ebx
     call $+5
     '''
             so_call_after = '''
     pop ebx
     sub ebx,%s
     xchg ebx,[esp]
     '''
             if self.context.write_so:
                 code += asm(so_call_before)
                 if mapping is not None:
                     # Note that if somehow newbase is a very small value we could have problems with the small
                     # encoding of sub.  This could result in different lengths between the mapping and code gen phases
                     code += asm(so_call_after %
                                 ((self.context.newbase +
                                   (mapping[ins.address] + len(code))) -
                                  (ins.address + len(ins.bytes))))
                 else:
                     code += asm(so_call_after %
                                 ((self.context.newbase) -
                                  (ins.address + len(ins.bytes))))
             else:
                 code += asm(exec_call % (ins.address + len(ins.bytes)))
         else:
             self.context.stat['dirjmp'] += 1
         newtarget = self.remap_target(ins.address, mapping, target,
                                       len(code))
         #print "(pre)new length: %s"%len(callback_code)
         #print "target: %s"%hex(target)
         #print "newtarget: %s"%newtarget
         if self.context.no_pic and target != self.context.get_pc_thunk:
             code += asm('%s $+%s' % (ins.mnemonic, newtarget))
         else:
             patched = asm('jmp $+%s' % newtarget)
             if len(patched) == 2:  #Short encoding, which we do not want
                 patched += '\x90\x90\x90'  #Add padding of 3 NOPs
             code += patched
         #print "new length: %s"%len(callback_code+patched)
         return code
     return None