Example #1
def ThumbExpandImm(imm12):
    x = int(imm12, 2)
    if (x >> 10) & 3 == 0:
        s = (x >> 8) & 3
        if s == 0b00:
            return cst(x & 0xFF, 32)
        elif s == 0b01:
            if x & 0xFF == 0:
                raise ValueError
            tmp = x & 0xFF
            imm32 = (tmp << 16) | tmp
        elif s == 0b10:
            if x & 0xFF == 0:
                raise ValueError
            tmp = (x & 0xFF) << 8
            imm32 = (tmp << 16) | tmp
        elif s == 0b11:
            if x & 0xFF == 0:
                raise ValueError
            tmp = x & 0xFF
            tmp2 = (tmp << 8) | tmp
            imm32 = (tmp2 << 16) | tmp2
        return cst(imm32, 32)
        v = cst((1 << 7) | (x & 0x7F), 32)
        return _ror(v, (x >> 7) & 0x1F)
Example #2
 def action_exp(toks):
     if len(toks) != 1:
     toks = toks[0]
     if not isinstance(toks, pp.ParseResults):
     for idx in range(len(toks)):
         if isinstance(toks[idx], pp.ParseResults):
             toks[idx] = action_exp([toks[idx]])
     if len(toks) == 2 and toks[0] == "-" and toks[1]._is_cst:
         return expressions.cst(-toks[1].value, size=toks[1].size)
     # We need to uniformize the sizes, else amoco will complain
     # The real size value is set later, it depends on other arguments
     if toks[2]._is_cst:
         toks[2].size = toks[0].size
         toks[2].v &= toks[2].mask
     elif toks[2]._is_lab:
         toks[0].size = toks[2].size
         if toks[0]._is_cst:
             toks[0].v &= toks[0].mask
     if len(toks) >= 5:
         toks[4].size = toks[0].size
     if len(toks) == 7:
         toks[6].size = toks[0].size
     # We could prefer to use a generic analysis of toks, but
     # dealing with specific cases one by one gives more control
     # on the output
     if toks[1] == "-":
         toks[2] = -toks[2]
         assert toks[1] == "+"
     if len(toks) == 3:
         return toks[0] + toks[2]
     if toks[3] == "-":
         toks[4] = -toks[4]
         assert toks[3] == "+"
     if len(toks) == 5:
         return toks[0] + toks[2] + toks[4]
     if toks[5] == "-":
         toks[6] = -toks[6]
         assert toks[5] == "+"
     if len(toks) == 7:
         return toks[0] + toks[2] + toks[4] + toks[6]
         print("EXP %s" % toks)
Example #3
class att_syntax(object): # Used as a namespace
    # The following three lines need to be redefined for x64
    env = env
    cpu_addrsize = env.internals['mode']

    pfx = pp.oneOf([ 'data16', 'addr16', 'data32', 'addr32', 'lock', 'wait',
                     'rep', 'repz', 'repe', 'repne', 'repnz'])

    # Putting everything in one regex is faster than having pyparsing
    # work with an alternative, e.g.  number = integer|hexa|octa|bina
    # We cannot generate an expressions.cst here, because we don't know
    # what will be the integer size; e.g. it will be 32 for %esp+10 but
    # it will be 64 for %rsp+10, and it will depend on the instruction
    # for $0
    number  = pp.Regex(r'(0[xX][0-9a-fA-F]+|0[bB][01]+|[0-9]+)')
    number.setParseAction(lambda toks:expressions.cst(int(toks[0],0),64))

    symbol  = pp.Regex('([a-zA-Z_][a-zA-Z0-9_.$]*|\.L[a-zA-Z0-9_.]+)(@[a-zA-Z]+)?|[1-9][0-9]*[bf]|\[\.-\.L[A-Z]*[0-9]*\]')
    char    = pp.Regex(r"('.)|('\\\\)")

    term    = symbol|number|char
    def action_term(toks):
        if isinstance(toks[0], str):
            return expressions.lab(toks[0],size=att_syntax.cpu_addrsize)

    exp     = pp.Forward()

    op_one  = pp.oneOf("- ~")
    op_sig  = pp.oneOf("+ -")
    op_mul  = pp.oneOf("* /")
    op_cmp  = pp.oneOf("== != <= >= < > <>")
    op_bit  = pp.oneOf("^ && || & |")

    operators = [(op_one,1,pp.opAssoc.RIGHT),
    exp << pp.operatorPrecedence(term,operators)
    def action_exp(toks):
        if len(toks) != 1: NEVER
        toks = toks[0]
        if not isinstance(toks, pp.ParseResults):
        for idx in range(len(toks)):
            if isinstance(toks[idx], pp.ParseResults):
                toks[idx] = att_syntax.action_exp([toks[idx]])
        if len(toks) == 2 and toks[0] == '-' and toks[1]._is_cst:
            return expressions.cst(-toks[1].value,size=toks[1].size)
        # We need to uniformize the sizes, else amoco will complain
        # The real size value is set later, it depends on other arguments
        if toks[2]._is_cst:
            toks[2].size = toks[0].size
            toks[2].v &= toks[2].mask
        elif toks[2]._is_lab:
            toks[0].size = toks[2].size
            if toks[0]._is_cst: toks[0].v &= toks[0].mask
        if len(toks) >= 5: toks[4].size = toks[0].size
        if len(toks) == 7: toks[6].size = toks[0].size
        # We could prefer to use a generic analysis of toks, but
        # dealing with specific cases one by one gives more control
        # on the output
        if toks[1] == '-': toks[2] = -toks[2]
        else:              assert toks[1] == '+'
        if len(toks) == 3:
            return toks[0] + toks[2]
        if toks[3] == '-': toks[4] = -toks[4]
        else:              assert toks[3] == '+'
        if len(toks) == 5:
            return toks[0] + toks[2] + toks[4]
        if toks[5] == '-': toks[6] = -toks[6]
        else:              assert toks[5] == '+'
        if len(toks) == 7:
            return toks[0] + toks[2] + toks[4] + toks[6]
            print("EXP %s"%toks)
    action_exp = staticmethod(action_exp)

    imm = '$'+exp
    imm.setParseAction(lambda toks:toks[1])

    fpreg = 'st('+pp.Regex(r'[0-7]')+')'
    reg = '%'+fpreg|'%'+symbol
    def action_reg(toks):
        r = toks[1]
        if r == 'st':  return att_syntax.env.st(0)
        if r == 'st(': return att_syntax.env.st(int(toks[2]))
        if r.startswith('mm'): return att_syntax.env.mmregs[int(r[2:])]
        if r.startswith('xmm'): return att_syntax.env.xmmregs[int(r[3:])]
        if r[0] == 'r' and r[-1] == 'b':
            # gcc or clang use 'r8b' instead of 'R8L' from Intel specs
            r = r[:-1]+'l'
        return att_syntax.env.__dict__[r]

    bis = '('+pp.Optional(reg)+pp.Optional(','+reg+pp.Optional(','+exp))+')'
    def action_bis(toks):
        if len(toks) == 3:
            addr = toks[1]
        elif len(toks) == 5 and toks[2] == ',':
            addr = expressions.op('+', toks[1], toks[3])
            if att_syntax.env.internals.get('keep_order'): addr.prop |= 16
        elif len(toks) == 6 and toks[1] == ',' and toks[3] == ',':
            toks[4].size = toks[2].size # cst size set to register size
            addr = expressions.oper('*', toks[2], toks[4])
        elif len(toks) == 7 and toks[2] == ',' and toks[4] == ',':
            toks[5].size = toks[3].size # cst size set to register size
            addr = expressions.op('+', toks[1], expressions.oper('*', toks[3], toks[5]))
            if att_syntax.env.internals.get('keep_order'): addr.prop |= 16
        return addr

    adr = exp+bis|bis|exp
    mem = pp.Optional(reg+':')+adr
    def action_mem(toks):
        # we use str(_) because != is redefined for amoco expressions
        # and fails when comparing with strings
        r = [ str(_) for _ in toks ]
        if ':' in r:
            assert len(r) == 3
            assert ':' == r[1]
            seg = toks[0]
            seg = ''
        if len(toks) == 2:
            if hasattr(disp, 'value'): disp = disp.value
            else: assert disp.size == addr.size
        if addr._is_cst and seg is '':
            seg = att_syntax.env.ds
        return expressions.mem(addr,att_syntax.cpu_addrsize,disp=disp,seg=seg)

    opd = mem|reg|imm
    ind = '*'+opd
    opd_i = opd|ind
    opds = pp.Group(pp.delimitedList(opd_i))

    instr = pp.Optional(pfx) + symbol + pp.Optional(opds)

    mmx_with_suffix1 = [ p+s for p in (
        ) for s in ('B','W','D','Q') ]
    mmx_with_suffix2 = [ p+s for p in (
        'UNPCKL','UNPCKH', 'MOV','MOVNT', 'HADD',
        'COMI','UCOMI', 'CMP', 'SHUF',
        ) for s in ('PS','PD','SD','SS') ]
    def action_instr(toks):
        i = instruction('')
        i.mnemonic = toks[0].upper()
        # Remove prefixes
        if i.mnemonic in ('REP','REPZ','REPNZ','REPE','REPNE'):
            if i.mnemonic in ('REP','REPZ','REPE'):
                i.misc.update({'pfx':['rep',   None,None,None], 'rep':True})
            if i.mnemonic in ('REPNZ','REPNE'):
                i.misc.update({'pfx':['repne',None,None, None], 'repne':True})
            i.mnemonic = toks[0].upper()
        # Get operands
        if len(toks) > 1:
            i.operands = list(reversed(toks[1][0:]))
        # Convert mnemonics, set operand sizes
        if i.mnemonic in ('CALLL','CALLQ','JMPL','JMPQ','RETL','RETQ',
            # clang on MacOS X
            if i.mnemonic[-1] in ('L','Q'):
                i.mnemonic = i.mnemonic[:-1]
                i.mnemonic = 'FUCOMIP'
        mnemo = i.mnemonic.lower()
        if i.mnemonic in ('CALL','JMP'):
            if len(i.operands) == 2 and i.operands[1] == '*':
                if not i.operands[0]._is_mem and \
                   not i.operands[0]._is_reg:
                    i.operands[0] = expressions.mem(i.operands[0],att_syntax.cpu_addrsize)
                if i.operands[0]._is_mem:
                    i.operands[0] = i.operands[0].a.base + i.operands[0].a.disp
        elif i.mnemonic.startswith(('J','SET','CMOV')):
            for pfx in ('J','SET','CMOV'):
                if i.mnemonic.startswith(pfx): break
            for i.cond in att_syntax.CONDITION_CODES.values():
                if i.mnemonic[len(pfx):] in i.cond[0].split('/'): break
                if pfx == 'CMOV' and i.mnemonic[-1] in ('W','L','Q'):
                    # clang on MacOS X
                    for i.cond in att_syntax.CONDITION_CODES.values():
                        if i.mnemonic[len(pfx):-1] in i.cond[0].split('/'):
                elif pfx == 'SET' and i.mnemonic[-1] == 'B':
                    # gcc 3.2.3
                    for i.cond in att_syntax.CONDITION_CODES.values():
                        if i.mnemonic[len(pfx):-1] in i.cond[0].split('/'):
            i.mnemonic = pfx+'cc'
            if pfx == 'J':
                i.operands[0] = i.operands[0].a.base + i.operands[0].a.disp
            if pfx == 'CMOV':
                if i.operands[0]._is_mem: i.operands[0].size = i.operands[1].size
                if i.operands[1]._is_mem: i.operands[1].size = i.operands[0].size
        elif mnemo in att_mnemo_correspondance:
            i.mnemonic = att_mnemo_correspondance[mnemo].upper()
            if i.mnemonic in ('CBW','CWD','IRET'):
                i.misc.update({'opdsz':16, 'pfx':[None, None, 'opdsz', None]})
        elif mnemo[-1] == 'w' and mnemo[:-1] in mnemo_string_rep:
            i.misc.update({'opdsz':16, 'pfx':[None, None, 'opdsz', None]})
        elif i.mnemonic == 'CMPSD' and len(i.operands) == 0:
            # String cmpsd, different from the SSE cmp*sd, has no arguments
        elif i.mnemonic == 'MOVSL':
            # String movsd, different from the SSE movsd
            # Has no arguments
            i.mnemonic = 'MOVSD'
            assert len(i.operands) == 0
        elif i.mnemonic[:-2] in ('MOVS','MOVZ'):
            i.misc['opdsz'], sz = {
                'BW': ( 8,16),
                'BL': ( 8,32),
                'BQ': ( 8,64),
                'WL': (16,32),
                'WQ': (16,64),
                'LQ': (32,64),
            if i.operands[1]._is_mem:
                i.operands[1].size = i.misc['opdsz']
            assert i.operands[0].size == sz
            i.mnemonic = i.mnemonic[:-2] + 'X'
        elif i.mnemonic == 'MOVD':
        elif i.mnemonic in (
            'FLDCW', 'FSTCW', 'FNSTCW',
            assert i.operands[0]._is_mem
            i.operands[0].size = 16
        elif i.mnemonic in att_syntax.mmx_with_suffix2 and len(i.operands):
            if i.mnemonic.endswith('SS'):
                if i.operands[1]._is_mem: i.operands[1].size = 32
            elif i.mnemonic.endswith('SD'):
                if i.operands[0]._is_mem: i.operands[0].size = 64
                if i.operands[1]._is_mem: i.operands[1].size = 64
                if i.operands[1]._is_mem: i.operands[1].size = 128
        elif i.mnemonic in (
            if i.operands[1]._is_mem: i.operands[1].size = 32
        elif i.mnemonic in (
            'MOVDDUP', 'PSHUFW',
            if i.operands[0]._is_mem: i.operands[0].size = 64
            if i.operands[1]._is_mem: i.operands[1].size = 64
        elif i.mnemonic in (
            if i.operands[0]._is_mem: i.operands[0].size = 128
            if i.operands[1]._is_mem: i.operands[1].size = 128
        elif i.mnemonic in att_syntax.mmx_with_suffix1 or i.mnemonic in (
            if i.operands[1]._is_mem: i.operands[1].size = i.operands[0].size
        elif i.mnemonic == 'MOVABSQ':
            # gcc generates 'movabsq $cst, %reg' instead of 'movq $cst, %reg'
            i.mnemonic = 'MOV'
            for _ in i.operands:
                if _._is_mem: _.size = 64
                if _._is_cst: _.size = 64; _.v &= _.mask
                if _._is_lab: _.size = 64
            for _ in att_mnemo_suffix_one_ptr:
                if mnemo[:-1] != _: continue
                # Detect MMX MOVQ instruction, not 64-bit register MOV for x64
                if i.mnemonic == 'MOVQ' \
                        and i.operands[1]._is_reg \
                        and 'mm' in i.operands[1].ref:
                    if i.operands[0]._is_mem: i.operands[0].size = 64
                if i.mnemonic == 'MOVQ' \
                        and i.operands[0]._is_reg \
                        and 'mm' in i.operands[0].ref:
                    if i.operands[1]._is_mem: i.operands[1].size = 64
                i.mnemonic = _.upper()
                sz = {'b':8, 'w':16, 'l':32, 'q':64}[mnemo[-1]]
                for _ in i.operands:
                    if _._is_mem: _.size = sz
                    if _._is_cst: _.size = sz; _.v &= _.mask
                    if _._is_lab: _.size = sz
            for _ in att_mnemo_suffix_one_iflt:
                if mnemo[:-1] != _: continue
                i.mnemonic = _.upper()
                sz = {'s':16, 'l':32, 'q':64}[mnemo[-1]]
                for _ in i.operands:
                    if _._is_mem: _.size = sz
            if mnemo[-2:] == 'll' and mnemo[:-2] in att_mnemo_suffix_one_iflt:
                # clang on MacOS X
                i.mnemonic = mnemo[:-2].upper()
                for _ in i.operands:
                    if _._is_mem: _.size = 64
            for _ in att_mnemo_float_optional_suffix:
                if mnemo[:-1] != _: continue
                if mnemo[-1] in ('p','r','z','1'): continue
                i.mnemonic = _.upper()
                sz = {'s':32, 'l':64, 't':80}[mnemo[-1]]
                for _ in i.operands:
                    if _._is_mem: _.size = sz
        # Implicit operands
        if i.mnemonic in ('AAD','AAM'):
            if len(i.operands) == 0:
                i.operands[0].size = 8
        elif i.mnemonic in ('SAL','SAR','SHL','SHR','ROR','ROL') \
                and len(i.operands) == 1:
        elif i.mnemonic.startswith('CMP') \
                and i.mnemonic.endswith(('PS','PD','SD','SS')) \
                and len(i.operands):
            idx = mnemo_sse_cmp_predicate.index(i.mnemonic[3:-2].lower())
            i.mnemonic = i.mnemonic[:3] + i.mnemonic[-2:]
        return i
Example #4
 def action_instr(toks):
     i = instruction('')
     i.mnemonic = toks[0].upper()
     # Remove prefixes
     if i.mnemonic in ('REP','REPZ','REPNZ','REPE','REPNE'):
         if i.mnemonic in ('REP','REPZ','REPE'):
             i.misc.update({'pfx':['rep',   None,None,None], 'rep':True})
         if i.mnemonic in ('REPNZ','REPNE'):
             i.misc.update({'pfx':['repne',None,None, None], 'repne':True})
         i.mnemonic = toks[0].upper()
     # Get operands
     if len(toks) > 1:
         i.operands = list(reversed(toks[1][0:]))
     # Convert mnemonics, set operand sizes
     if i.mnemonic in ('CALLL','CALLQ','JMPL','JMPQ','RETL','RETQ',
         # clang on MacOS X
         if i.mnemonic[-1] in ('L','Q'):
             i.mnemonic = i.mnemonic[:-1]
             i.mnemonic = 'FUCOMIP'
     mnemo = i.mnemonic.lower()
     if i.mnemonic in ('CALL','JMP'):
         if len(i.operands) == 2 and i.operands[1] == '*':
             if not i.operands[0]._is_mem and \
                not i.operands[0]._is_reg:
                 i.operands[0] = expressions.mem(i.operands[0],att_syntax.cpu_addrsize)
             if i.operands[0]._is_mem:
                 i.operands[0] = i.operands[0].a.base + i.operands[0].a.disp
     elif i.mnemonic.startswith(('J','SET','CMOV')):
         for pfx in ('J','SET','CMOV'):
             if i.mnemonic.startswith(pfx): break
         for i.cond in att_syntax.CONDITION_CODES.values():
             if i.mnemonic[len(pfx):] in i.cond[0].split('/'): break
             if pfx == 'CMOV' and i.mnemonic[-1] in ('W','L','Q'):
                 # clang on MacOS X
                 for i.cond in att_syntax.CONDITION_CODES.values():
                     if i.mnemonic[len(pfx):-1] in i.cond[0].split('/'):
             elif pfx == 'SET' and i.mnemonic[-1] == 'B':
                 # gcc 3.2.3
                 for i.cond in att_syntax.CONDITION_CODES.values():
                     if i.mnemonic[len(pfx):-1] in i.cond[0].split('/'):
         i.mnemonic = pfx+'cc'
         if pfx == 'J':
             i.operands[0] = i.operands[0].a.base + i.operands[0].a.disp
         if pfx == 'CMOV':
             if i.operands[0]._is_mem: i.operands[0].size = i.operands[1].size
             if i.operands[1]._is_mem: i.operands[1].size = i.operands[0].size
     elif mnemo in att_mnemo_correspondance:
         i.mnemonic = att_mnemo_correspondance[mnemo].upper()
         if i.mnemonic in ('CBW','CWD','IRET'):
             i.misc.update({'opdsz':16, 'pfx':[None, None, 'opdsz', None]})
     elif mnemo[-1] == 'w' and mnemo[:-1] in mnemo_string_rep:
         i.misc.update({'opdsz':16, 'pfx':[None, None, 'opdsz', None]})
     elif i.mnemonic == 'CMPSD' and len(i.operands) == 0:
         # String cmpsd, different from the SSE cmp*sd, has no arguments
     elif i.mnemonic == 'MOVSL':
         # String movsd, different from the SSE movsd
         # Has no arguments
         i.mnemonic = 'MOVSD'
         assert len(i.operands) == 0
     elif i.mnemonic[:-2] in ('MOVS','MOVZ'):
         i.misc['opdsz'], sz = {
             'BW': ( 8,16),
             'BL': ( 8,32),
             'BQ': ( 8,64),
             'WL': (16,32),
             'WQ': (16,64),
             'LQ': (32,64),
         if i.operands[1]._is_mem:
             i.operands[1].size = i.misc['opdsz']
         assert i.operands[0].size == sz
         i.mnemonic = i.mnemonic[:-2] + 'X'
     elif i.mnemonic == 'MOVD':
     elif i.mnemonic in (
         'FLDCW', 'FSTCW', 'FNSTCW',
         assert i.operands[0]._is_mem
         i.operands[0].size = 16
     elif i.mnemonic in att_syntax.mmx_with_suffix2 and len(i.operands):
         if i.mnemonic.endswith('SS'):
             if i.operands[1]._is_mem: i.operands[1].size = 32
         elif i.mnemonic.endswith('SD'):
             if i.operands[0]._is_mem: i.operands[0].size = 64
             if i.operands[1]._is_mem: i.operands[1].size = 64
             if i.operands[1]._is_mem: i.operands[1].size = 128
     elif i.mnemonic in (
         if i.operands[1]._is_mem: i.operands[1].size = 32
     elif i.mnemonic in (
         'MOVDDUP', 'PSHUFW',
         if i.operands[0]._is_mem: i.operands[0].size = 64
         if i.operands[1]._is_mem: i.operands[1].size = 64
     elif i.mnemonic in (
         if i.operands[0]._is_mem: i.operands[0].size = 128
         if i.operands[1]._is_mem: i.operands[1].size = 128
     elif i.mnemonic in att_syntax.mmx_with_suffix1 or i.mnemonic in (
         if i.operands[1]._is_mem: i.operands[1].size = i.operands[0].size
     elif i.mnemonic == 'MOVABSQ':
         # gcc generates 'movabsq $cst, %reg' instead of 'movq $cst, %reg'
         i.mnemonic = 'MOV'
         for _ in i.operands:
             if _._is_mem: _.size = 64
             if _._is_cst: _.size = 64; _.v &= _.mask
             if _._is_lab: _.size = 64
         for _ in att_mnemo_suffix_one_ptr:
             if mnemo[:-1] != _: continue
             # Detect MMX MOVQ instruction, not 64-bit register MOV for x64
             if i.mnemonic == 'MOVQ' \
                     and i.operands[1]._is_reg \
                     and 'mm' in i.operands[1].ref:
                 if i.operands[0]._is_mem: i.operands[0].size = 64
             if i.mnemonic == 'MOVQ' \
                     and i.operands[0]._is_reg \
                     and 'mm' in i.operands[0].ref:
                 if i.operands[1]._is_mem: i.operands[1].size = 64
             i.mnemonic = _.upper()
             sz = {'b':8, 'w':16, 'l':32, 'q':64}[mnemo[-1]]
             for _ in i.operands:
                 if _._is_mem: _.size = sz
                 if _._is_cst: _.size = sz; _.v &= _.mask
                 if _._is_lab: _.size = sz
         for _ in att_mnemo_suffix_one_iflt:
             if mnemo[:-1] != _: continue
             i.mnemonic = _.upper()
             sz = {'s':16, 'l':32, 'q':64}[mnemo[-1]]
             for _ in i.operands:
                 if _._is_mem: _.size = sz
         if mnemo[-2:] == 'll' and mnemo[:-2] in att_mnemo_suffix_one_iflt:
             # clang on MacOS X
             i.mnemonic = mnemo[:-2].upper()
             for _ in i.operands:
                 if _._is_mem: _.size = 64
         for _ in att_mnemo_float_optional_suffix:
             if mnemo[:-1] != _: continue
             if mnemo[-1] in ('p','r','z','1'): continue
             i.mnemonic = _.upper()
             sz = {'s':32, 'l':64, 't':80}[mnemo[-1]]
             for _ in i.operands:
                 if _._is_mem: _.size = sz
     # Implicit operands
     if i.mnemonic in ('AAD','AAM'):
         if len(i.operands) == 0:
             i.operands[0].size = 8
     elif i.mnemonic in ('SAL','SAR','SHL','SHR','ROR','ROL') \
             and len(i.operands) == 1:
     elif i.mnemonic.startswith('CMP') \
             and i.mnemonic.endswith(('PS','PD','SD','SS')) \
             and len(i.operands):
         idx = mnemo_sse_cmp_predicate.index(i.mnemonic[3:-2].lower())
         i.mnemonic = i.mnemonic[:3] + i.mnemonic[-2:]
     return i
Example #5
def ARMExpandImm(x):
    v = cst(x & 0xFF, 32)
    return _ror2(v, (x >> 8) & 0xF)
Example #6
def att_syntax_gen(env, CONDITION_CODES, cpu_addrsize, instruction):
    pfx = pp.oneOf([

    # Putting everything in one regex is faster than having pyparsing
    # work with an alternative, e.g.  number = integer|hexa|octa|bina
    # We cannot generate an expressions.cst here, because we don't know
    # what will be the integer size; e.g. it will be 32 for %esp+10 but
    # it will be 64 for %rsp+10, and it will depend on the instruction
    # for $0
    number = pp.Regex(r"(0[xX][0-9a-fA-F]+|0[bB][01]+|[0-9]+)")
    number.setParseAction(lambda toks: expressions.cst(int(toks[0], 0), 64))

    symbol = pp.Regex(
    char = pp.Regex(r"('.)|('\\\\)")

    term = symbol | number | char

    def action_term(toks):
        if isinstance(toks[0], str):
            return expressions.lab(toks[0], size=cpu_addrsize)


    exp = pp.Forward()

    op_one = pp.oneOf("- ~")
    op_sig = pp.oneOf("+ -")
    op_mul = pp.oneOf("* /")
    op_cmp = pp.oneOf("== != <= >= < > <>")
    op_bit = pp.oneOf("^ && || & |")

    operators = [
        (op_one, 1, pp.opAssoc.RIGHT),
        (op_sig, 2, pp.opAssoc.LEFT),
        (op_mul, 2, pp.opAssoc.LEFT),
        (op_cmp, 2, pp.opAssoc.LEFT),
        (op_bit, 2, pp.opAssoc.LEFT),
    exp << pp.operatorPrecedence(term, operators)

    def action_exp(toks):
        if len(toks) != 1:
        toks = toks[0]
        if not isinstance(toks, pp.ParseResults):
        for idx in range(len(toks)):
            if isinstance(toks[idx], pp.ParseResults):
                toks[idx] = action_exp([toks[idx]])
        if len(toks) == 2 and toks[0] == "-" and toks[1]._is_cst:
            return expressions.cst(-toks[1].value, size=toks[1].size)
        # We need to uniformize the sizes, else amoco will complain
        # The real size value is set later, it depends on other arguments
        if toks[2]._is_cst:
            toks[2].size = toks[0].size
            toks[2].v &= toks[2].mask
        elif toks[2]._is_lab:
            toks[0].size = toks[2].size
            if toks[0]._is_cst:
                toks[0].v &= toks[0].mask
        if len(toks) >= 5:
            toks[4].size = toks[0].size
        if len(toks) == 7:
            toks[6].size = toks[0].size
        # We could prefer to use a generic analysis of toks, but
        # dealing with specific cases one by one gives more control
        # on the output
        if toks[1] == "-":
            toks[2] = -toks[2]
            assert toks[1] == "+"
        if len(toks) == 3:
            return toks[0] + toks[2]
        if toks[3] == "-":
            toks[4] = -toks[4]
            assert toks[3] == "+"
        if len(toks) == 5:
            return toks[0] + toks[2] + toks[4]
        if toks[5] == "-":
            toks[6] = -toks[6]
            assert toks[5] == "+"
        if len(toks) == 7:
            return toks[0] + toks[2] + toks[4] + toks[6]
            print("EXP %s" % toks)


    imm = "$" + exp
    imm.setParseAction(lambda toks: toks[1])

    fpreg = "st(" + pp.Regex(r"[0-7]") + ")"
    reg = "%" + fpreg | "%" + symbol

    def action_reg(toks):
        r = toks[1]
        if r == "st":
            return env.st(0)
        if r == "st(":
            return env.st(int(toks[2]))
        if r.startswith("mm"):
            return env.mmregs[int(r[2:])]
        if r.startswith("xmm"):
            return env.xmmregs[int(r[3:])]
        if r.startswith("cr"):
            return env.cr(int(r[2:]))
        if r.startswith("dr"):
            return env.dr(int(r[2:]))
        if r[0] == "r" and r[-1] == "b":
            # gcc or clang use 'r8b' instead of 'R8L' from Intel specs
            r = r[:-1] + "l"
        return env.__dict__[r]


    bis = "(" + pp.Optional(reg) + pp.Optional("," + reg +
                                               pp.Optional("," + exp)) + ")"

    def action_bis(toks):
        if len(toks) == 3:
            addr = toks[1]
        elif len(toks) == 5 and toks[2] == ",":
            addr = expressions.op("+", toks[1], toks[3])
            if env.internals.get("keep_order"):
                addr.prop |= 16
        elif len(toks) == 6 and toks[1] == "," and toks[3] == ",":
            toks[4].size = toks[2].size  # cst size set to register size
            addr = expressions.oper("*", toks[2], toks[4])
        elif len(toks) == 7 and toks[2] == "," and toks[4] == ",":
            toks[5].size = toks[3].size  # cst size set to register size
            addr = expressions.op("+", toks[1],
                                  expressions.oper("*", toks[3], toks[5]))
            if env.internals.get("keep_order"):
                addr.prop |= 16
        return addr


    adr = exp + bis | bis | exp
    mem = pp.Optional(reg + ":") + adr

    def action_mem(toks):
        # we use str(_) because != is redefined for amoco expressions
        # and fails when comparing with strings
        r = [str(_) for _ in toks]
        if ":" in r:
            assert len(r) == 3
            assert ":" == r[1]
            seg = toks[0]
            seg = ""
        if len(toks) == 2:
            addr = toks[1]
            disp = toks[0]
            if hasattr(disp, "value"):
                disp = disp.value
                assert disp.size == addr.size
            addr = toks[0]
            disp = 0
        return expressions.mem(addr, cpu_addrsize, disp=disp, seg=seg)


    opd = mem | reg | imm
    ind = "*" + opd
    opd_i = opd | ind
    opds = pp.Group(pp.delimitedList(opd_i))

    instr = pp.Optional(pfx) + symbol + pp.Optional(opds)

    mmx_with_suffix1 = [
        p + s for p in (
        ) for s in ("B", "W", "D", "Q")
    mmx_with_suffix2 = [
        p + s for p in (
        ) for s in ("PS", "PD", "SD", "SS")

    def action_instr(toks):
        i = instruction(b"")
        i.mnemonic = toks[0].upper()
        # Remove prefixes
        if i.mnemonic in ("REP", "REPZ", "REPNZ", "REPE", "REPNE", "LOCK"):
            if i.mnemonic in ("REP", "REPZ", "REPE"):
                i.misc.update({"pfx": ["rep", None, None, None], "rep": True})
            if i.mnemonic in ("REPNZ", "REPNE"):
                    "pfx": ["repne", None, None, None],
                    "repne": True
            if i.mnemonic in ("LOCK", ):
                    "pfx": ["lock", None, None, None],
                    "lock": True
            del toks[0]  # toks.pop(0) is broken for pyparsing 2.0.2
            # https://bugs.launchpad.net/ubuntu/+source/pyparsing/+bug/1381564
            i.mnemonic = toks[0].upper()
        # Get operands
        if len(toks) > 1:
            i.operands = list(reversed(toks[1][0:]))
        # Convert mnemonics, set operand sizes
        if i.mnemonic in (
            # clang on MacOS X
            if i.mnemonic[-1] in ("L", "Q"):
                i.mnemonic = i.mnemonic[:-1]
                i.mnemonic = "FUCOMIP"
        mnemo = i.mnemonic.lower()
        if i.mnemonic in ("CALL", "JMP"):
            if len(i.operands) == 2 and i.operands[1] == "*":
                if not i.operands[0]._is_mem and not i.operands[0]._is_reg:
                    i.operands[0] = expressions.mem(i.operands[0],
                if i.operands[0]._is_mem:
                    i.operands[0] = i.operands[0].a.base + i.operands[0].a.disp
        elif i.mnemonic.startswith(("J", "SET", "CMOV")):
            for pfx in ("J", "SET", "CMOV"):
                if i.mnemonic.startswith(pfx):
            for i.cond in CONDITION_CODES.values():
                if i.mnemonic[len(pfx):] in i.cond[0].split("/"):
                if pfx == "CMOV" and i.mnemonic[-1] in ("W", "L", "Q"):
                    # clang on MacOS X
                    for i.cond in CONDITION_CODES.values():
                        if i.mnemonic[len(pfx):-1] in i.cond[0].split("/"):
                elif pfx == "SET" and i.mnemonic[-1] == "B":
                    # gcc 3.2.3
                    for i.cond in CONDITION_CODES.values():
                        if i.mnemonic[len(pfx):-1] in i.cond[0].split("/"):
            i.mnemonic = pfx + "cc"
            if pfx == "J":
                i.operands[0] = i.operands[0].a.base + i.operands[0].a.disp
            if pfx == "CMOV":
                if i.operands[0]._is_mem:
                    i.operands[0].size = i.operands[1].size
                if i.operands[1]._is_mem:
                    i.operands[1].size = i.operands[0].size
        elif mnemo in att_mnemo_correspondance:
            i.mnemonic = att_mnemo_correspondance[mnemo].upper()
            if i.mnemonic in ("CBW", "CWD", "IRET"):
                    "opdsz": 16,
                    "pfx": [None, None, "opdsz", None]
        elif mnemo[-1] == "w" and mnemo[:-1] in mnemo_string_rep:
            i.misc.update({"opdsz": 16, "pfx": [None, None, "opdsz", None]})
        elif i.mnemonic == "CMPSD" and len(i.operands) == 0:
            # String cmpsd, different from the SSE cmp*sd, has no arguments
        elif i.mnemonic == "MOVSL":
            # String movsd, different from the SSE movsd
            # Has no arguments
            i.mnemonic = "MOVSD"
            assert len(i.operands) == 0
        elif i.mnemonic[:-2] in ("MOVS", "MOVZ"):
            i.misc["opdsz"], sz = {
                "BW": (8, 16),
                "BL": (8, 32),
                "BQ": (8, 64),
                "WL": (16, 32),
                "WQ": (16, 64),
                "LQ": (32, 64),
            if i.operands[1]._is_mem:
                i.operands[1].size = i.misc["opdsz"]
            assert i.operands[0].size == sz
            i.mnemonic = i.mnemonic[:-2] + "X"
        elif i.mnemonic == "MOVD":
        elif i.mnemonic in (
            assert i.operands[0]._is_mem
            i.operands[0].size = 16
        elif i.mnemonic == "CMPXCHG":
            if i.operands[0]._is_mem:
                i.operands[0].size = i.operands[1].size
        elif i.mnemonic == "CMPXCHG8B":
            if i.operands[0]._is_mem:
                i.operands[0].size = 64
        elif i.mnemonic == "CMPXCHG16B":
            if i.operands[0]._is_mem:
                i.operands[0].size = 128
        elif i.mnemonic in mmx_with_suffix2 and len(i.operands):
            if i.mnemonic.endswith("SS"):
                if i.operands[1]._is_mem:
                    i.operands[1].size = 32
            elif i.mnemonic.endswith("SD"):
                if i.operands[0]._is_mem:
                    i.operands[0].size = 64
                if i.operands[1]._is_mem:
                    i.operands[1].size = 64
                if i.operands[1]._is_mem:
                    i.operands[1].size = 128
        elif i.mnemonic in (
            if i.operands[1]._is_mem:
                i.operands[1].size = 32
            # gcc 4.9.2 generates cvttss2siq %xmm0, %r15
            # which is useless, because the size of the output is
            # determined by the output register
            if i.mnemonic[-1] in "LQ":
                i.mnemonic = i.mnemonic[:-1]
        elif i.mnemonic in (
            if i.operands[0]._is_mem:
                i.operands[0].size = 64
            if i.operands[1]._is_mem:
                i.operands[1].size = 64
            if i.mnemonic[-1] in "LQ":
                i.mnemonic = i.mnemonic[:-1]
        elif i.mnemonic in (
            if i.operands[0]._is_mem:
                i.operands[0].size = 128
            if i.operands[1]._is_mem:
                i.operands[1].size = 128
        elif i.mnemonic in mmx_with_suffix1 or i.mnemonic in (
            if i.operands[1]._is_mem:
                i.operands[1].size = i.operands[0].size
        elif i.mnemonic == "MOVABSQ":
            # gcc generates 'movabsq $cst, %reg' instead of 'movq $cst, %reg'
            i.mnemonic = "MOV"
            for _ in i.operands:
                if _._is_mem:
                    _.size = 64
                if _._is_cst:
                    _.size = 64
                    _.v &= _.mask
                if _._is_lab:
                    _.size = 64
            for _ in att_mnemo_suffix_one_ptr:
                if mnemo[:-1] != _:
                # Detect MMX MOVQ instruction, not 64-bit register MOV for x64
                if (i.mnemonic == "MOVQ" and i.operands[1]._is_reg
                        and "mm" in i.operands[1].ref):
                    if i.operands[0]._is_mem:
                        i.operands[0].size = 64
                if (i.mnemonic == "MOVQ" and i.operands[0]._is_reg
                        and "mm" in i.operands[0].ref):
                    if i.operands[1]._is_mem:
                        i.operands[1].size = 64
                i.mnemonic = _.upper()
                sz = {"b": 8, "w": 16, "l": 32, "q": 64}[mnemo[-1]]
                if "q" == mnemo[-1]:
                    i.misc.update({"REX": (1, 0, 0, 0)})
                if "w" == mnemo[-1]:
                        "opdsz": 16,
                        "pfx": [None, None, "opdsz", None]

                def set_size(e, sz):
                    if e._is_mem:
                        e.size = sz
                    if e._is_cst:
                        e.size = sz
                        e.v &= e.mask
                    if e._is_lab:
                        e.size = sz
                    if e._is_eqn:
                        e.size = sz
                        if e.l is not None:
                            set_size(e.l, sz)
                        set_size(e.r, sz)

                for _ in i.operands:
                    set_size(_, sz)
            for _ in att_mnemo_suffix_one_iflt:
                if mnemo[:-1] != _:
                i.mnemonic = _.upper()
                sz = {"s": 16, "l": 32, "q": 64}[mnemo[-1]]
                for _ in i.operands:
                    if _._is_mem:
                        _.size = sz
            if mnemo[-2:] == "ll" and mnemo[:-2] in att_mnemo_suffix_one_iflt:
                # clang on MacOS X
                i.mnemonic = mnemo[:-2].upper()
                for _ in i.operands:
                    if _._is_mem:
                        _.size = 64
            for _ in att_mnemo_float_optional_suffix:
                if mnemo[:-1] != _:
                if mnemo[-1] in ("i", "p", "r", "z", "1"):
                i.mnemonic = _.upper()
                sz = {"s": 32, "l": 64, "t": 80}[mnemo[-1]]
                for _ in i.operands:
                    if _._is_mem:
                        _.size = sz
        # Implicit operands
        if i.mnemonic in ("AAD", "AAM"):
            if len(i.operands) == 0:
                i.operands.append(expressions.cst(10, 8))
                i.operands[0].size = 8
        elif (i.mnemonic in ("SAL", "SAR", "SHL", "SHR", "ROR", "ROL")
              and len(i.operands) == 1):
            i.operands.append(expressions.cst(1, 32))
        elif i.mnemonic in ("SHLD", "SHRD") and len(i.operands) == 2:
        elif (i.mnemonic.startswith("CMP") and i.mnemonic.endswith(
            ("PS", "PD", "SD", "SS")) and len(i.operands)):
            idx = mnemo_sse_cmp_predicate.index(i.mnemonic[3:-2].lower())
            i.mnemonic = i.mnemonic[:3] + i.mnemonic[-2:]
        elif (i.mnemonic in (
        ) and len(i.operands) == 1 and not i.operands[0]._is_mem):
            i.operands.insert(0, env.st(0))
        elif (i.mnemonic in (
        ) and len(i.operands) == 1 and not i.operands[0]._is_mem):
        elif (i.mnemonic in (
        ) and len(i.operands) == 0):
        return i

    # Set instr.instr for compatibility with previous versions of amoco
    # where att_syntax is a namespace containing 'instr'.
    # Set instr.__name__ for test_parser
    instr.instr = instr
    instr.__name__ = "att_syntax"
    return instr
Example #7
    def action_instr(toks):
        i = instruction(b"")
        i.mnemonic = toks[0].upper()
        # Remove prefixes
        if i.mnemonic in ("REP", "REPZ", "REPNZ", "REPE", "REPNE", "LOCK"):
            if i.mnemonic in ("REP", "REPZ", "REPE"):
                i.misc.update({"pfx": ["rep", None, None, None], "rep": True})
            if i.mnemonic in ("REPNZ", "REPNE"):
                    "pfx": ["repne", None, None, None],
                    "repne": True
            if i.mnemonic in ("LOCK", ):
                    "pfx": ["lock", None, None, None],
                    "lock": True
            del toks[0]  # toks.pop(0) is broken for pyparsing 2.0.2
            # https://bugs.launchpad.net/ubuntu/+source/pyparsing/+bug/1381564
            i.mnemonic = toks[0].upper()
        # Get operands
        if len(toks) > 1:
            i.operands = list(reversed(toks[1][0:]))
        # Convert mnemonics, set operand sizes
        if i.mnemonic in (
            # clang on MacOS X
            if i.mnemonic[-1] in ("L", "Q"):
                i.mnemonic = i.mnemonic[:-1]
                i.mnemonic = "FUCOMIP"
        mnemo = i.mnemonic.lower()
        if i.mnemonic in ("CALL", "JMP"):
            if len(i.operands) == 2 and i.operands[1] == "*":
                if not i.operands[0]._is_mem and not i.operands[0]._is_reg:
                    i.operands[0] = expressions.mem(i.operands[0],
                if i.operands[0]._is_mem:
                    i.operands[0] = i.operands[0].a.base + i.operands[0].a.disp
        elif i.mnemonic.startswith(("J", "SET", "CMOV")):
            for pfx in ("J", "SET", "CMOV"):
                if i.mnemonic.startswith(pfx):
            for i.cond in CONDITION_CODES.values():
                if i.mnemonic[len(pfx):] in i.cond[0].split("/"):
                if pfx == "CMOV" and i.mnemonic[-1] in ("W", "L", "Q"):
                    # clang on MacOS X
                    for i.cond in CONDITION_CODES.values():
                        if i.mnemonic[len(pfx):-1] in i.cond[0].split("/"):
                elif pfx == "SET" and i.mnemonic[-1] == "B":
                    # gcc 3.2.3
                    for i.cond in CONDITION_CODES.values():
                        if i.mnemonic[len(pfx):-1] in i.cond[0].split("/"):
            i.mnemonic = pfx + "cc"
            if pfx == "J":
                i.operands[0] = i.operands[0].a.base + i.operands[0].a.disp
            if pfx == "CMOV":
                if i.operands[0]._is_mem:
                    i.operands[0].size = i.operands[1].size
                if i.operands[1]._is_mem:
                    i.operands[1].size = i.operands[0].size
        elif mnemo in att_mnemo_correspondance:
            i.mnemonic = att_mnemo_correspondance[mnemo].upper()
            if i.mnemonic in ("CBW", "CWD", "IRET"):
                    "opdsz": 16,
                    "pfx": [None, None, "opdsz", None]
        elif mnemo[-1] == "w" and mnemo[:-1] in mnemo_string_rep:
            i.misc.update({"opdsz": 16, "pfx": [None, None, "opdsz", None]})
        elif i.mnemonic == "CMPSD" and len(i.operands) == 0:
            # String cmpsd, different from the SSE cmp*sd, has no arguments
        elif i.mnemonic == "MOVSL":
            # String movsd, different from the SSE movsd
            # Has no arguments
            i.mnemonic = "MOVSD"
            assert len(i.operands) == 0
        elif i.mnemonic[:-2] in ("MOVS", "MOVZ"):
            i.misc["opdsz"], sz = {
                "BW": (8, 16),
                "BL": (8, 32),
                "BQ": (8, 64),
                "WL": (16, 32),
                "WQ": (16, 64),
                "LQ": (32, 64),
            if i.operands[1]._is_mem:
                i.operands[1].size = i.misc["opdsz"]
            assert i.operands[0].size == sz
            i.mnemonic = i.mnemonic[:-2] + "X"
        elif i.mnemonic == "MOVD":
        elif i.mnemonic in (
            assert i.operands[0]._is_mem
            i.operands[0].size = 16
        elif i.mnemonic == "CMPXCHG":
            if i.operands[0]._is_mem:
                i.operands[0].size = i.operands[1].size
        elif i.mnemonic == "CMPXCHG8B":
            if i.operands[0]._is_mem:
                i.operands[0].size = 64
        elif i.mnemonic == "CMPXCHG16B":
            if i.operands[0]._is_mem:
                i.operands[0].size = 128
        elif i.mnemonic in mmx_with_suffix2 and len(i.operands):
            if i.mnemonic.endswith("SS"):
                if i.operands[1]._is_mem:
                    i.operands[1].size = 32
            elif i.mnemonic.endswith("SD"):
                if i.operands[0]._is_mem:
                    i.operands[0].size = 64
                if i.operands[1]._is_mem:
                    i.operands[1].size = 64
                if i.operands[1]._is_mem:
                    i.operands[1].size = 128
        elif i.mnemonic in (
            if i.operands[1]._is_mem:
                i.operands[1].size = 32
            # gcc 4.9.2 generates cvttss2siq %xmm0, %r15
            # which is useless, because the size of the output is
            # determined by the output register
            if i.mnemonic[-1] in "LQ":
                i.mnemonic = i.mnemonic[:-1]
        elif i.mnemonic in (
            if i.operands[0]._is_mem:
                i.operands[0].size = 64
            if i.operands[1]._is_mem:
                i.operands[1].size = 64
            if i.mnemonic[-1] in "LQ":
                i.mnemonic = i.mnemonic[:-1]
        elif i.mnemonic in (
            if i.operands[0]._is_mem:
                i.operands[0].size = 128
            if i.operands[1]._is_mem:
                i.operands[1].size = 128
        elif i.mnemonic in mmx_with_suffix1 or i.mnemonic in (
            if i.operands[1]._is_mem:
                i.operands[1].size = i.operands[0].size
        elif i.mnemonic == "MOVABSQ":
            # gcc generates 'movabsq $cst, %reg' instead of 'movq $cst, %reg'
            i.mnemonic = "MOV"
            for _ in i.operands:
                if _._is_mem:
                    _.size = 64
                if _._is_cst:
                    _.size = 64
                    _.v &= _.mask
                if _._is_lab:
                    _.size = 64
            for _ in att_mnemo_suffix_one_ptr:
                if mnemo[:-1] != _:
                # Detect MMX MOVQ instruction, not 64-bit register MOV for x64
                if (i.mnemonic == "MOVQ" and i.operands[1]._is_reg
                        and "mm" in i.operands[1].ref):
                    if i.operands[0]._is_mem:
                        i.operands[0].size = 64
                if (i.mnemonic == "MOVQ" and i.operands[0]._is_reg
                        and "mm" in i.operands[0].ref):
                    if i.operands[1]._is_mem:
                        i.operands[1].size = 64
                i.mnemonic = _.upper()
                sz = {"b": 8, "w": 16, "l": 32, "q": 64}[mnemo[-1]]
                if "q" == mnemo[-1]:
                    i.misc.update({"REX": (1, 0, 0, 0)})
                if "w" == mnemo[-1]:
                        "opdsz": 16,
                        "pfx": [None, None, "opdsz", None]

                def set_size(e, sz):
                    if e._is_mem:
                        e.size = sz
                    if e._is_cst:
                        e.size = sz
                        e.v &= e.mask
                    if e._is_lab:
                        e.size = sz
                    if e._is_eqn:
                        e.size = sz
                        if e.l is not None:
                            set_size(e.l, sz)
                        set_size(e.r, sz)

                for _ in i.operands:
                    set_size(_, sz)
            for _ in att_mnemo_suffix_one_iflt:
                if mnemo[:-1] != _:
                i.mnemonic = _.upper()
                sz = {"s": 16, "l": 32, "q": 64}[mnemo[-1]]
                for _ in i.operands:
                    if _._is_mem:
                        _.size = sz
            if mnemo[-2:] == "ll" and mnemo[:-2] in att_mnemo_suffix_one_iflt:
                # clang on MacOS X
                i.mnemonic = mnemo[:-2].upper()
                for _ in i.operands:
                    if _._is_mem:
                        _.size = 64
            for _ in att_mnemo_float_optional_suffix:
                if mnemo[:-1] != _:
                if mnemo[-1] in ("i", "p", "r", "z", "1"):
                i.mnemonic = _.upper()
                sz = {"s": 32, "l": 64, "t": 80}[mnemo[-1]]
                for _ in i.operands:
                    if _._is_mem:
                        _.size = sz
        # Implicit operands
        if i.mnemonic in ("AAD", "AAM"):
            if len(i.operands) == 0:
                i.operands.append(expressions.cst(10, 8))
                i.operands[0].size = 8
        elif (i.mnemonic in ("SAL", "SAR", "SHL", "SHR", "ROR", "ROL")
              and len(i.operands) == 1):
            i.operands.append(expressions.cst(1, 32))
        elif i.mnemonic in ("SHLD", "SHRD") and len(i.operands) == 2:
        elif (i.mnemonic.startswith("CMP") and i.mnemonic.endswith(
            ("PS", "PD", "SD", "SS")) and len(i.operands)):
            idx = mnemo_sse_cmp_predicate.index(i.mnemonic[3:-2].lower())
            i.mnemonic = i.mnemonic[:3] + i.mnemonic[-2:]
        elif (i.mnemonic in (
        ) and len(i.operands) == 1 and not i.operands[0]._is_mem):
            i.operands.insert(0, env.st(0))
        elif (i.mnemonic in (
        ) and len(i.operands) == 1 and not i.operands[0]._is_mem):
        elif (i.mnemonic in (
        ) and len(i.operands) == 0):
        return i
Example #8
 def p_term_number(self,p):
     r'''term : integer
              | character'''
     p[0] = expr.cst(p[1])
Example #9
def asmhelper(i):
    for idx, a in enumerate(i.operands):
        if a._is_mem:
            i.operands[idx] = a.a
    # Add implicit arguments
    if i.mnemonic in ['inc', 'dec'] and len(i.operands) == 1:
        i.operands.insert(0, cst(1))
    # Expand reduced forms
    if i.mnemonic == 'bset':
        i.mnemonic = 'or'
        i.operands.insert(0, i.operands[1])
    elif i.mnemonic == 'mov':
        i.mnemonic = 'or'
        i.operands.insert(0, env.g0)
    elif i.mnemonic == 'retl':
        i.mnemonic = 'jmpl'
        i.operands.insert(0, op('+', env.o7, cst(8)))
        i.operands.insert(1, env.g0)
    elif i.mnemonic == 'jmp':
        i.mnemonic = 'jmpl'
        i.operands.insert(1, env.g0)
    elif i.mnemonic == 'clr' and i.operands[0]._is_reg:
        i.mnemonic = 'or'
        i.operands.insert(0, env.g0)
        i.operands.insert(0, env.g0)
    elif i.mnemonic == 'clr':
        i.mnemonic = 'st'
        i.operands.insert(0, env.g0)
    elif i.mnemonic == 'inc':
        i.mnemonic = 'add'
        i.operands.insert(0, i.operands[1])
    elif i.mnemonic == 'dec':
        i.mnemonic = 'sub'
        i.operands.insert(0, i.operands[1])
    elif i.mnemonic == 'cmp':
        i.mnemonic = 'subcc'
        i.operands.insert(2, env.g0)
    elif i.mnemonic == 'btst':
        i.mnemonic = 'andcc'
        i.operands.insert(2, env.g0)
        i.operands[0:2] = [i.operands[1], i.operands[0]]
    elif i.mnemonic == 'nop':
        i.mnemonic = 'sethi'
        i.operands = [cst(0, 22), env.g0]
    elif i.mnemonic == 'restore' and len(i.operands) == 0:
        i.operands = [env.g0, env.g0, env.g0]
    # Branches and cc
    if i.mnemonic.endswith(
            'cc') and not i.mnemonic in ['taddcc', 'tsubcc', 'mulscc']:
        i.mnemonic = i.mnemonic[:-2]
        i.misc['icc'] = True
    if i.mnemonic.endswith(',a'):
        i.misc['annul'] = True
        i.mnemonic = i.mnemonic.rstrip(',a')
    if i.mnemonic in b_synonyms:
        i.mnemonic = b_synonyms[i.mnemonic]
    if i.mnemonic in b_cond:
        i.cond = b_cond[i.mnemonic]
        i.mnemonic = 'b'
    if i.mnemonic in t_synonyms:
        i.mnemonic = t_synonyms[i.mnemonic]
    if i.mnemonic in t_cond:
        i.cond = t_cond[i.mnemonic]
        i.mnemonic = 't'
    if i.mnemonic == 'call':
        if len(i.operands) > 1 and i.operands[1] != cst(0):
            raise ValueError('call has a non-zero second argument')
        i.operands = [i.operands[0]]
    # Additional internal tweaks
    if i.mnemonic == 'sethi' and i.operands[0]._is_cst:
        i.operands[0].size = 22
    elif i.mnemonic == 'std':
        i.rd = env.r.index(i.operands[0])
    elif i.mnemonic == 'ldd':
        i.rd = env.r.index(i.operands[1])
    i.spec = spec_table[i.mnemonic]
    i.bytes = (0, 0, 0, 0)  # To have i.length == 4, for pc_npc emulation
    return i
Example #10
    def action_instr(toks):
        i = instruction(b'')
        i.mnemonic = toks[0].upper()
        # Remove prefixes
        if i.mnemonic in ('REP', 'REPZ', 'REPNZ', 'REPE', 'REPNE', 'LOCK'):
            if i.mnemonic in ('REP', 'REPZ', 'REPE'):
                i.misc.update({'pfx': ['rep', None, None, None], 'rep': True})
            if i.mnemonic in ('REPNZ', 'REPNE'):
                    'pfx': ['repne', None, None, None],
                    'repne': True
            if i.mnemonic in ('LOCK', ):
                    'pfx': ['lock', None, None, None],
                    'lock': True
            del toks[0]  # toks.pop(0) is broken for pyparsing 2.0.2
            # https://bugs.launchpad.net/ubuntu/+source/pyparsing/+bug/1381564
            i.mnemonic = toks[0].upper()
        # Get operands
        if len(toks) > 1:
            i.operands = list(reversed(toks[1][0:]))
        # Convert mnemonics, set operand sizes
        if i.mnemonic in ('CALLL', 'CALLQ', 'JMPL', 'JMPQ', 'RETL', 'RETQ',
                          'BSWAPL', 'BSWAPQ', 'FUCOMPI'):
            # clang on MacOS X
            if i.mnemonic[-1] in ('L', 'Q'):
                i.mnemonic = i.mnemonic[:-1]
                i.mnemonic = 'FUCOMIP'
        mnemo = i.mnemonic.lower()
        if i.mnemonic in ('CALL', 'JMP'):
            if len(i.operands) == 2 and i.operands[1] == '*':
                if not i.operands[0]._is_mem and \
                   not i.operands[0]._is_reg:
                    i.operands[0] = expressions.mem(i.operands[0],
                if i.operands[0]._is_mem:
                    i.operands[0] = i.operands[0].a.base + i.operands[0].a.disp
        elif i.mnemonic.startswith(('J', 'SET', 'CMOV')):
            for pfx in ('J', 'SET', 'CMOV'):
                if i.mnemonic.startswith(pfx): break
            for i.cond in CONDITION_CODES.values():
                if i.mnemonic[len(pfx):] in i.cond[0].split('/'): break
                if pfx == 'CMOV' and i.mnemonic[-1] in ('W', 'L', 'Q'):
                    # clang on MacOS X
                    for i.cond in CONDITION_CODES.values():
                        if i.mnemonic[len(pfx):-1] in i.cond[0].split('/'):
                elif pfx == 'SET' and i.mnemonic[-1] == 'B':
                    # gcc 3.2.3
                    for i.cond in CONDITION_CODES.values():
                        if i.mnemonic[len(pfx):-1] in i.cond[0].split('/'):
            i.mnemonic = pfx + 'cc'
            if pfx == 'J':
                i.operands[0] = i.operands[0].a.base + i.operands[0].a.disp
            if pfx == 'CMOV':
                if i.operands[0]._is_mem:
                    i.operands[0].size = i.operands[1].size
                if i.operands[1]._is_mem:
                    i.operands[1].size = i.operands[0].size
        elif mnemo in att_mnemo_correspondance:
            i.mnemonic = att_mnemo_correspondance[mnemo].upper()
            if i.mnemonic in ('CBW', 'CWD', 'IRET'):
                    'opdsz': 16,
                    'pfx': [None, None, 'opdsz', None]
        elif mnemo[-1] == 'w' and mnemo[:-1] in mnemo_string_rep:
            i.misc.update({'opdsz': 16, 'pfx': [None, None, 'opdsz', None]})
        elif i.mnemonic == 'CMPSD' and len(i.operands) == 0:
            # String cmpsd, different from the SSE cmp*sd, has no arguments
        elif i.mnemonic == 'MOVSL':
            # String movsd, different from the SSE movsd
            # Has no arguments
            i.mnemonic = 'MOVSD'
            assert len(i.operands) == 0
        elif i.mnemonic[:-2] in ('MOVS', 'MOVZ'):
            i.misc['opdsz'], sz = {
                'BW': (8, 16),
                'BL': (8, 32),
                'BQ': (8, 64),
                'WL': (16, 32),
                'WQ': (16, 64),
                'LQ': (32, 64),
            if i.operands[1]._is_mem:
                i.operands[1].size = i.misc['opdsz']
            assert i.operands[0].size == sz
            i.mnemonic = i.mnemonic[:-2] + 'X'
        elif i.mnemonic == 'MOVD':
        elif i.mnemonic in (
            assert i.operands[0]._is_mem
            i.operands[0].size = 16
        elif i.mnemonic == 'CMPXCHG':
            if i.operands[0]._is_mem: i.operands[0].size = i.operands[1].size
        elif i.mnemonic == 'CMPXCHG8B':
            if i.operands[0]._is_mem: i.operands[0].size = 64
        elif i.mnemonic == 'CMPXCHG16B':
            if i.operands[0]._is_mem: i.operands[0].size = 128
        elif i.mnemonic in mmx_with_suffix2 and len(i.operands):
            if i.mnemonic.endswith('SS'):
                if i.operands[1]._is_mem: i.operands[1].size = 32
            elif i.mnemonic.endswith('SD'):
                if i.operands[0]._is_mem: i.operands[0].size = 64
                if i.operands[1]._is_mem: i.operands[1].size = 64
                if i.operands[1]._is_mem: i.operands[1].size = 128
        elif i.mnemonic in (
            if i.operands[1]._is_mem: i.operands[1].size = 32
            # gcc 4.9.2 generates cvttss2siq %xmm0, %r15
            # which is useless, because the size of the output is
            # determined by the output register
            if i.mnemonic[-1] in 'LQ': i.mnemonic = i.mnemonic[:-1]
        elif i.mnemonic in (
            if i.operands[0]._is_mem: i.operands[0].size = 64
            if i.operands[1]._is_mem: i.operands[1].size = 64
            if i.mnemonic[-1] in 'LQ': i.mnemonic = i.mnemonic[:-1]
        elif i.mnemonic in (
            if i.operands[0]._is_mem: i.operands[0].size = 128
            if i.operands[1]._is_mem: i.operands[1].size = 128
        elif i.mnemonic in mmx_with_suffix1 or i.mnemonic in (
            if i.operands[1]._is_mem: i.operands[1].size = i.operands[0].size
        elif i.mnemonic == 'MOVABSQ':
            # gcc generates 'movabsq $cst, %reg' instead of 'movq $cst, %reg'
            i.mnemonic = 'MOV'
            for _ in i.operands:
                if _._is_mem: _.size = 64
                if _._is_cst:
                    _.size = 64
                    _.v &= _.mask
                if _._is_lab: _.size = 64
            for _ in att_mnemo_suffix_one_ptr:
                if mnemo[:-1] != _: continue
                # Detect MMX MOVQ instruction, not 64-bit register MOV for x64
                if i.mnemonic == 'MOVQ' \
                        and i.operands[1]._is_reg \
                        and 'mm' in i.operands[1].ref:
                    if i.operands[0]._is_mem: i.operands[0].size = 64
                if i.mnemonic == 'MOVQ' \
                        and i.operands[0]._is_reg \
                        and 'mm' in i.operands[0].ref:
                    if i.operands[1]._is_mem: i.operands[1].size = 64
                i.mnemonic = _.upper()
                sz = {'b': 8, 'w': 16, 'l': 32, 'q': 64}[mnemo[-1]]
                if 'q' == mnemo[-1]:
                    i.misc.update({'REX': (1, 0, 0, 0)})
                if 'w' == mnemo[-1]:
                        'opdsz': 16,
                        'pfx': [None, None, 'opdsz', None]

                def set_size(e, sz):
                    if e._is_mem: e.size = sz
                    if e._is_cst:
                        e.size = sz
                        e.v &= e.mask
                    if e._is_lab: e.size = sz
                    if e._is_eqn:
                        e.size = sz
                        if e.l is not None: set_size(e.l, sz)
                        set_size(e.r, sz)

                for _ in i.operands:
                    set_size(_, sz)
            for _ in att_mnemo_suffix_one_iflt:
                if mnemo[:-1] != _: continue
                i.mnemonic = _.upper()
                sz = {'s': 16, 'l': 32, 'q': 64}[mnemo[-1]]
                for _ in i.operands:
                    if _._is_mem: _.size = sz
            if mnemo[-2:] == 'll' and mnemo[:-2] in att_mnemo_suffix_one_iflt:
                # clang on MacOS X
                i.mnemonic = mnemo[:-2].upper()
                for _ in i.operands:
                    if _._is_mem: _.size = 64
            for _ in att_mnemo_float_optional_suffix:
                if mnemo[:-1] != _: continue
                if mnemo[-1] in ('i', 'p', 'r', 'z', '1'): continue
                i.mnemonic = _.upper()
                sz = {'s': 32, 'l': 64, 't': 80}[mnemo[-1]]
                for _ in i.operands:
                    if _._is_mem: _.size = sz
        # Implicit operands
        if i.mnemonic in ('AAD', 'AAM'):
            if len(i.operands) == 0:
                i.operands.append(expressions.cst(10, 8))
                i.operands[0].size = 8
        elif i.mnemonic in ('SAL','SAR','SHL','SHR','ROR','ROL') \
                and len(i.operands) == 1:
            i.operands.append(expressions.cst(1, 32))
        elif i.mnemonic in ('SHLD','SHRD') \
                and len(i.operands) == 2:
        elif i.mnemonic.startswith('CMP') \
                and i.mnemonic.endswith(('PS','PD','SD','SS')) \
                and len(i.operands):
            idx = mnemo_sse_cmp_predicate.index(i.mnemonic[3:-2].lower())
            i.mnemonic = i.mnemonic[:3] + i.mnemonic[-2:]
        elif i.mnemonic in ('FADD','FSUB','FSUBR',
                           ) \
                and len(i.operands) == 1 \
                and not i.operands[0]._is_mem:
            i.operands.insert(0, env.st(0))
        elif i.mnemonic in ('FADDP','FSUBP','FSUBRP',
                           ) \
                and len(i.operands) == 1 \
                and not i.operands[0]._is_mem:
        elif i.mnemonic in ('FCOM','FCOMP','FUCOM','FUCOMP',
                           ) \
                and len(i.operands) == 0:
        return i
Example #11
File: gas.py Project: xorpse/amoco
 def p_term_number(self, p):
     r"""term : integer
              | character"""
     p[0] = expr.cst(p[1])