Example #1
0
class Aqualung(object):
    def __init__(self):
        print 'Aqualung -- An experimental ARM decompiler'
        print 'Copyright 2007 Cody Brocious (cody DOT brocious AT gmail DOT com)'

        self.shiftMap = dict(
            ASL='<<',
            ASR='>>',
            LSL='<<',
            LSR='>>',
        )

        self.arithMap = dict(
            ADD='+',
            AND='&',
            BIC='&',
            LSL='<<',
            LSR='>>',
            MUL='*',
            ORR='|',
            SUB='-',
            SUBS='-',
            EOR='^',
        )

        self.suffixMap = dict(
            NE='!=',
            EQ='==',
            GE='>=',
            PL='>=',
            CS='<=',
            GT='>',
            VC='>',
            HI='>',
            CC='<',
            LT='<',
            MI='<',
            LS='<',
            VS='<',
        )

        self.ida = IDA()

        self.decompiled = []

        self.func = self.ida.getFunc()
        self.cmp = None
        self.decompile(self.func[0])

        if idaapi.get_screen_ea() not in self.decompiled:
            self.decompile(idaapi.get_screen_ea())

    def decompile(self, ea, regs=None):
        if regs == None:
            self.regs = dict(r0='arg0',
                             r1='arg1',
                             r2='arg2',
                             r3=None,
                             r4=None,
                             r5=None,
                             r6=None,
                             r7=None,
                             r8=None,
                             r9=None,
                             r10=None,
                             r11=None,
                             r12=None,
                             r13=None,
                             r14=None,
                             r15=None,
                             trash=None)
        else:
            self.regs = regs

        if ea in self.decompiled:
            return

        # print 'Decompiling %08X' % ea

        while True:
            if ea in self.decompiled:
                break
            self.decompiled.append(ea)
            self.ea = ea

            if ea == idaapi.get_screen_ea():
                for name in self.regs:
                    print '%s == %s' % (name, self.regs[name])

            mnem, ops, branches = self.ida.getInsn(ea)
            self.dispatch(ea, mnem, ops)

            if len(branches) == 0:
                break
            elif len(branches) == 1:
                ea = branches[0]
                continue
            else:
                curfunc = self.ida.getFunc(ea)
                for branch in branches:
                    nextfunc = self.ida.getFunc(branch)

                    if nextfunc != curfunc:
                        self.decompile(branch)
                    else:
                        self.decompile(branch, regs=self.regs)
                break

    def dispatch(self, ea, mnem, ops):
        cond = ''

        if (mnem.startswith('LDR')
                or mnem.startswith('STR')) and len(mnem) == 6:
            mnem, size = mnem[:-1], mnem[-1]
        else:
            size = None

        for suffix in self.suffixMap:
            if mnem.endswith(suffix):
                tempmnem = mnem[:-len(suffix)]
                if not self.findHandler(tempmnem, nonCmp=True):
                    continue
                mnem = tempmnem
                if self.cmp:
                    cond = '%s %s %s' % (self.cmp[0], self.suffixMap[suffix],
                                         self.cmp[1])
                    cond = 'if(%s) ' % ' && '.join(self.cmp[2] + [cond])
                break

        if size:
            mnem += size

        handler = self.findHandler(mnem)
        if handler != None:
            out = handler(mnem, ops)
            if out != None and out != False:
                #idc.MakeComm(ea, cond + out)
                idc.MakeComm(ea, '')
        else:
            pass  # print 'Unhandled instruction:', mnem, ', '.join(ops)

    def findHandler(self, mnem, nonCmp=False):
        if mnem in self.arithMap:
            handler = self.arithmetic
        elif not nonCmp and (mnem.startswith('CMP') or mnem.startswith('CMN')
                             or mnem.startswith('TST')):
            handler = self.compare
        else:
            try:
                handler = getattr(self, mnem)
            except:
                handler = None
        return handler

    def allnum(self, inp):
        for c in inp:
            if c not in '0123456789':
                return False
        return True

    def isConstant(self, value):
        if value[0] == '"' and value[-1] == '"':
            return True
        elif self.allnum(value):
            return True
        elif value[:2] == '0x':
            return True
        return False

    def addParens(self, value):
        value = value.lower()
        if value[0] == 'r' and len(value) <= 3:
            return value
        elif value[0] == '(' and value[-1] == ')':
            return value

        return '(%s)' % value

    def processOp(self, op, noRef=False, out=False):
        temp = op.rsplit(',', 1)
        if len(temp) > 1 and temp[1][1] == 'S':
            op = self.processOp(temp[0])
            value = self.processOp(temp[1][3:])

            return '(%s %s %s)' % (op, self.shiftMap[temp[1][:3]], value)

        if op[0] == '[' and op[-1] == ']':
            ops = op[1:-1].split(',')
            if len(ops) > 1:
                return self.addParens(' + '.join(
                    self.processOp(op) for op in ops))
            else:
                return ops[0]
        elif op[0] == '#':
            if self.allnum(op[1:]):
                return '0x%x' % eval(op[1:] + 'L')
            else:
                return op[1:]
        elif op[0] == '=':
            if noRef:
                return (op[1:], )
            else:
                return '&' + op[1:]
        else:
            oplower = op.lower()

            if out or oplower not in self.regs or self.regs[oplower] == None:
                return op

            return self.regs[oplower]

    def arithmetic(self, mnem, ops):
        out = self.processOp(ops[0], out=True)
        outlower = out.lower()
        if outlower not in self.regs:
            outlower = 'trash'

        if len(ops) == 2:
            left = out
            right = self.processOp(ops[1])
        else:
            left = self.processOp(ops[1])
            right = self.processOp(ops[2])

        if mnem == 'BIC':
            right = '~%s' % right

        if right == '0x0':
            self.regs[out.lower()] = left
            return '%s = %s;' % (out, left)

        if out != left:
            arith = '%s %s %s' % (left, self.arithMap[mnem], right)
            self.regs[outlower] = self.addParens(arith)
            return '%s = %s;' % (out, arith)
        else:
            self.regs[outlower] = '%s %s %s' % (self.processOp(out),
                                                self.arithMap[mnem], right)
            if right[0] == '(':
                right = right.lstrip('(').rstrip(')')
            return '%s %s= %s;' % (out, self.arithMap[mnem], right)

    def compare(self, mnem, ops):
        op1, op2 = self.processOp(ops[0]), self.processOp(ops[1])

        if mnem.startswith('CMN'):
            op2 = '-' + op2
        elif mnem.startswith('TST'):
            op1 = '(%s & %s)' % (op1, op2)

        if len(mnem) == 3 or not self.cmp:
            self.cmp = op1, op2, []
        else:
            self.cmp[2].append(
                '%s %s %s' %
                (self.cmp[0], self.suffixMap[mnem[3:]], self.cmp[1]))
            self.cmp = op1, op2, self.cmp[2]

    def ADR(self, mnem, ops):
        out = self.processOp(ops[0], out=True)
        addr = idc.LocByName(ops[1])
        type = idc.GetStringType(addr)
        if type == 0:
            data = self.ida.getString(addr)
            data = '"%s"' % ` "'" + data ` [2:-1]
        else:
            data = ops[1]

        return '%s = %s;' % (out, data)

    def B(self, mnem, ops):
        return ''

    def BX(self, mnem, ops):
        return ''

    def BL(self, mnem, ops):
        args = []

        for reg in ('r0', 'r1', 'r2'):
            if self.regs[reg] == None:
                args.append(reg.upper())
            else:
                args.append(self.regs[reg])

        call = '%s(%s)' % (ops[0], ', '.join(args))
        self.regs['r0'] = call

        return call

    BLX = BL

    def LDR(self, mnem, ops):
        out = self.processOp(ops[0], out=True)
        outlower = out.lower()
        if outlower not in self.regs:
            outlower = 'trash'

        right = self.processOp(ops[1], noRef=True)

        addr = idc.LocByName(ops[1][1:])
        type = idc.GetStringType(addr)
        if type == 0:
            right = self.ida.getString(addr)
            right = ('"%s"' % ` "'" + right ` [2:-1], )

        if right.__class__ == tuple:
            right = right[0]
        else:
            right = '*' + right

        self.regs[outlower] = right

        return '%s = %s;' % (out, right)

    def LDRB(self, mnem, ops):
        out = self.processOp(ops[0], out=True)
        outlower = out.lower()
        if outlower not in self.regs:
            outlower = 'trash'

        addr = self.processOp(ops[1])
        addr = '*(uchar *) %s' % addr

        self.regs[outlower] = addr

        return '%s = %s;' % (out, addr)

    def LDRH(self, mnem, ops):
        out = self.processOp(ops[0], out=True)
        outlower = out.lower()
        if outlower not in self.regs:
            outlower = 'trash'

        addr = self.processOp(ops[1])
        addr = '*(ushort *) %s' % addr

        self.regs[outlower] = addr

        return '%s = %s;' % (out, addr)

    def MOV(self, mnem, ops):
        out = self.processOp(ops[0], out=True)
        outlower = out.lower()
        if outlower not in self.regs:
            outlower = 'trash'

        value = self.processOp(ops[1])

        if not self.isConstant(value):
            self.regs[outlower] = self.addParens(value)
        else:
            self.regs[outlower] = None

        if value[0] == '(':
            value = value.lstrip('(').rstrip(')')

        if value.startswith(out + ' '):
            op, value = value[len(out) + 1:].split(' ', 1)
            return '%s %s= %s;' % (out, op, value)
        else:
            return '%s = %s;' % (out, value)

    MOVL = MOV
    MOVS = MOV

    def STR(self, mnem, ops):
        value = self.processOp(ops[0])
        out = self.processOp(ops[1], out=True)

        return '*%s = %s;' % (out, value)

    def STRB(self, mnem, ops):
        value = self.processOp(ops[0])
        out = self.processOp(ops[1], out=True)
        return '*(uchar *) %s = %s;' % (out, value)

    def STRH(self, mnem, ops):
        value = self.processOp(ops[0])
        out = self.processOp(ops[1], out=True)
        return '*(ushort *) %s = %s;' % (out, value)
Example #2
0
class Aqualung(object):
    def __init__(self):
        print 'Aqualung -- An experimental ARM decompiler'
        print 'Copyright 2007 Cody Brocious (cody DOT brocious AT gmail DOT com)'
        
        self.shiftMap = dict(
            ASL='<<',
            ASR='>>',
            LSL='<<',
            LSR='>>',
        )
        
        self.arithMap = dict(
            ADD='+',
            AND='&',
            BIC='&',
            LSL='<<',
            LSR='>>',
            MUL='*',
            ORR='|',
            SUB='-',
            SUBS='-',
            EOR='^',
        )
        
        self.suffixMap = dict(
            NE='!=',
            EQ='==',
            GE='>=',
            PL='>=',
            CS='<=',
            GT='>',
            VC='>',
            HI='>',
            CC='<',
            LT='<',
            MI='<',
            LS='<',
            VS='<',
        )
        
        self.ida = IDA()

        self.decompiled = []
        
        self.func = self.ida.getFunc()
        self.cmp = None
        self.decompile(self.func[0])
        
        if idaapi.get_screen_ea() not in self.decompiled:
            self.decompile(idaapi.get_screen_ea())
    
    def decompile(self, ea, regs=None):
        if regs == None:
            self.regs = dict(
                r0 ='arg0', r1 ='arg1', r2 ='arg2', r3 =None,
                r4 =None, r5 =None, r6 =None, r7 =None,
                r8 =None, r9 =None, r10=None, r11=None,
                r12=None, r13=None, r14=None, r15=None,
                trash=None
            )
        else:
            self.regs = regs
        
        if ea in self.decompiled:
            return
        
        # print 'Decompiling %08X' % ea
        
        while True:
            if ea in self.decompiled:
                break
            self.decompiled.append(ea)
            self.ea = ea
            
            if ea == idaapi.get_screen_ea():
                for name in self.regs:
                    print '%s == %s' % (name, self.regs[name])
            
            mnem, ops, branches = self.ida.getInsn(ea)
            self.dispatch(ea, mnem, ops)
            
            if len(branches) == 0:
                break
            elif len(branches) == 1:
                ea = branches[0]
                continue
            else:
                curfunc = self.ida.getFunc(ea)
                for branch in branches:
                    nextfunc = self.ida.getFunc(branch)
                    
                    if nextfunc != curfunc:
                        self.decompile(branch)
                    else:
                        self.decompile(branch, regs=self.regs)
                break
    
    def dispatch(self, ea, mnem, ops):
        cond = ''
        
        if (mnem.startswith('LDR') or mnem.startswith('STR')) and len(mnem) == 6:
            mnem, size = mnem[:-1], mnem[-1]
        else:
            size = None
        
        for suffix in self.suffixMap:
            if mnem.endswith(suffix):
                tempmnem = mnem[:-len(suffix)]
                if not self.findHandler(tempmnem, nonCmp=True):
                    continue
                mnem = tempmnem
                if self.cmp:
                    cond = '%s %s %s' % (self.cmp[0], self.suffixMap[suffix], self.cmp[1])
                    cond = 'if(%s) ' % ' && '.join(self.cmp[2] + [cond])
                break
        
        if size:
            mnem += size
        
        handler = self.findHandler(mnem)
        if handler != None:
            out = handler(mnem, ops)
            if out != None and out != False:
                #idc.MakeComm(ea, cond + out)
                idc.MakeComm(ea, '')
        else:
            pass # print 'Unhandled instruction:', mnem, ', '.join(ops)
    
    def findHandler(self, mnem, nonCmp=False):
        if mnem in self.arithMap:
            handler = self.arithmetic
        elif not nonCmp and (mnem.startswith('CMP') or mnem.startswith('CMN') or mnem.startswith('TST')):
            handler = self.compare
        else:
            try:
                handler = getattr(self, mnem)
            except:
                handler = None
        return handler
    
    def allnum(self, inp):
        for c in inp:
            if c not in '0123456789':
                return False
        return True
    
    def isConstant(self, value):
        if value[0] == '"' and value[-1] == '"':
            return True
        elif self.allnum(value):
            return True
        elif value[:2] == '0x':
            return True
        return False
    
    def addParens(self, value):
        value = value.lower()
        if value[0] == 'r' and len(value) <= 3:
            return value
        elif value[0] == '(' and value[-1] == ')':
            return value
        
        return '(%s)' % value
    
    def processOp(self, op, noRef=False, out=False):
        temp = op.rsplit(',', 1)
        if len(temp) > 1 and temp[1][1] == 'S':
            op = self.processOp(temp[0])
            value = self.processOp(temp[1][3:])
            
            return '(%s %s %s)' % (op, self.shiftMap[temp[1][:3]], value)
        
        if op[0] == '[' and op[-1] == ']':
            ops = op[1:-1].split(',')
            if len(ops) > 1:
                return self.addParens(' + '.join(self.processOp(op) for op in ops))
            else:
                return ops[0]
        elif op[0] == '#':
            if self.allnum(op[1:]):
                return '0x%x' % eval(op[1:] + 'L')
            else:
                return op[1:]
        elif op[0] == '=':
            if noRef:
                return (op[1:], )
            else:
                return '&' + op[1:]
        else:
            oplower = op.lower()
            
            if out or oplower not in self.regs or self.regs[oplower] == None:
                return op
            
            return self.regs[oplower]
    
    def arithmetic(self, mnem, ops):
        out = self.processOp(ops[0], out=True)
        outlower = out.lower()
        if outlower not in self.regs:
            outlower = 'trash'
        
        if len(ops) == 2:
            left = out
            right = self.processOp(ops[1])
        else:
            left = self.processOp(ops[1])
            right = self.processOp(ops[2])
        
        if mnem == 'BIC':
            right = '~%s' % right
        
        if right == '0x0':
            self.regs[out.lower()] = left
            return '%s = %s;' % (out, left)
        
        if out != left:
            arith = '%s %s %s' % (left, self.arithMap[mnem], right)
            self.regs[outlower] = self.addParens(arith)
            return '%s = %s;' % (out, arith)
        else:
            self.regs[outlower] = '%s %s %s' % (self.processOp(out), self.arithMap[mnem], right)
            if right[0] == '(':
                right = right.lstrip('(').rstrip(')')
            return '%s %s= %s;' % (out, self.arithMap[mnem], right)
    
    def compare(self, mnem, ops):
        op1, op2 = self.processOp(ops[0]), self.processOp(ops[1])
        
        if mnem.startswith('CMN'):
            op2 = '-' + op2
        elif mnem.startswith('TST'):
            op1 = '(%s & %s)' % (op1, op2)
        
        if len(mnem) == 3 or not self.cmp:
            self.cmp = op1, op2, []
        else:
            self.cmp[2].append('%s %s %s' % (self.cmp[0], self.suffixMap[mnem[3:]], self.cmp[1]))
            self.cmp = op1, op2, self.cmp[2]
    
    def ADR(self, mnem, ops):
        out = self.processOp(ops[0], out=True)
        addr = idc.LocByName(ops[1])
        type = idc.GetStringType(addr)
        if type == 0:
            data = self.ida.getString(addr)
            data = '"%s"' % `"'" + data`[2:-1]
        else:
            data = ops[1]
        
        return '%s = %s;' % (out, data)
    
    def B(self, mnem, ops):
        return ''

    def BX(self, mnem, ops):
        return ''
    
    def BL(self, mnem, ops):
        args = []
        
        for reg in ('r0', 'r1', 'r2'):
            if self.regs[reg] == None:
                args.append(reg.upper())
            else:
                args.append(self.regs[reg])
        
        call = '%s(%s)' % (ops[0], ', '.join(args))
        self.regs['r0'] = call
        
        return call
    BLX = BL
    
    def LDR(self, mnem, ops):
        out = self.processOp(ops[0], out=True)
        outlower = out.lower()
        if outlower not in self.regs:
            outlower = 'trash'
        
        right = self.processOp(ops[1], noRef=True)

        addr = idc.LocByName(ops[1][1:])
        type = idc.GetStringType(addr)
        if type == 0:
            right = self.ida.getString(addr)
            right = ('"%s"' % `"'" + right`[2:-1], )
        
        if right.__class__ == tuple:
            right = right[0]
        else:
            right = '*' + right
        
        self.regs[outlower] = right
        
        return '%s = %s;' % (out, right)
    
    def LDRB(self, mnem, ops):
        out = self.processOp(ops[0], out=True)
        outlower = out.lower()
        if outlower not in self.regs:
            outlower = 'trash'
        
        addr = self.processOp(ops[1])
        addr = '*(uchar *) %s' % addr
        
        self.regs[outlower] = addr
        
        return '%s = %s;' % (out, addr)
    
    def LDRH(self, mnem, ops):
        out = self.processOp(ops[0], out=True)
        outlower = out.lower()
        if outlower not in self.regs:
            outlower = 'trash'
        
        addr = self.processOp(ops[1])
        addr = '*(ushort *) %s' % addr
        
        self.regs[outlower] = addr
        
        return '%s = %s;' % (out, addr)
    
    def MOV(self, mnem, ops):
        out = self.processOp(ops[0], out=True)
        outlower = out.lower()
        if outlower not in self.regs:
            outlower = 'trash'
        
        value = self.processOp(ops[1])
        
        if not self.isConstant(value):
            self.regs[outlower] = self.addParens(value)
        else:
            self.regs[outlower] = None
        
        if value[0] == '(':
            value = value.lstrip('(').rstrip(')')
        
        if value.startswith(out + ' '):
            op, value = value[len(out)+1:].split(' ', 1)
            return '%s %s= %s;' % (out, op, value)
        else:
            return '%s = %s;' % (out, value)

    MOVL = MOV
    MOVS = MOV

    def STR(self, mnem, ops):
        value = self.processOp(ops[0])
        out = self.processOp(ops[1], out=True)
        
        return '*%s = %s;' % (out, value)
    
    def STRB(self, mnem, ops):
        value = self.processOp(ops[0])
        out = self.processOp(ops[1], out=True)
        return '*(uchar *) %s = %s;' % (out, value)
    
    def STRH(self, mnem, ops):
        value = self.processOp(ops[0])
        out = self.processOp(ops[1], out=True)
        return '*(ushort *) %s = %s;' % (out, value)