Beispiel #1
0
def test_memory():
    arches = ['VexArchX86', 'VexArchPPC32', 'VexArchAMD64', 'VexArchARM']
    # we're not including VexArchMIPS32 cause it segfaults sometimes

    for i in xrange(10000):
        try:
            s = hex(random.randint(2**100, 2**100 * 16))[2:]
            a = random.choice(arches)
            p = pyvex.IRSB(bytes=s, arch=a)
        except pyvex.PyVEXError:
            pass

    kb_start = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss

    for i in xrange(20000):
        try:
            s = hex(random.randint(2**100, 2**100 * 16))[2:]
            a = random.choice(arches)
            p = pyvex.IRSB(bytes=s, arch=a)
        except pyvex.PyVEXError:
            pass
    del p
    gc.collect()

    kb_end = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss

    # allow a 2mb leeway
    nose.tools.assert_less(kb_end - kb_start, 2000)
Beispiel #2
0
def test_two_irsb():
    irsb1 = pyvex.IRSB(bytes='\x5d\xc3')
    irsb2 = pyvex.IRSB(bytes='\x5d\x5d\x5d\x5d')

    stmts1 = irsb1.statements
    stmts2 = irsb2.statements

    nose.tools.assert_not_equal(len(stmts1), len(stmts2))
Beispiel #3
0
def test_irsb_tyenv():
    irsb = pyvex.IRSB(bytes='\x5d\xc3')
    print irsb.tyenv
    print "Orig"
    print irsb.tyenv
    print "Copy"
    print irsb.tyenv.deepCopy()

    print "Empty"
    irsb2 = pyvex.IRSB()
    print irsb2.tyenv

    print "Unwrapped"
    irsb2.tyenv = irsb.tyenv.deepCopy()
    print irsb2.tyenv
Beispiel #4
0
def test_irsb_deepCopy():
    irsb = pyvex.IRSB(bytes='\x5d\xc3')
    stmts = irsb.statements

    irsb2 = irsb.deepCopy()
    stmts2 = irsb2.statements
    nose.tools.assert_equal(len(stmts), len(stmts2))
Beispiel #5
0
def test_irstmt_noop():
    irsb = pyvex.IRSB(bytes='\x90\x5d\xc3')
    irnop = irsb.statements[0]
    irnop2 = pyvex.IRStmt.NoOp()
    irnop3 = irnop2.deepCopy()

    nose.tools.assert_equal(irnop.tag, "Ist_NoOp")
    nose.tools.assert_equal(type(irnop), type(irnop2))
    nose.tools.assert_equal(type(irnop), type(irnop3))
Beispiel #6
0
def test_irsb_addStmt():
    irsb = pyvex.IRSB(bytes='\x5d\xc3')
    stmts = irsb.statements

    irsb2 = irsb.deepCopyExceptStmts()
    nose.tools.assert_equal(len(irsb2.statements), 0)

    for n, i in enumerate(stmts):
        nose.tools.assert_equal(len(irsb2.statements), n)
        irsb2.addStatement(i.deepCopy())

    irsb2.pp()
Beispiel #7
0
def test_irexpr_rdtmp():
    m = pyvex.IRExpr.RdTmp(123)
    nose.tools.assert_equal(m.tag, "Iex_RdTmp")
    nose.tools.assert_equal(m.tmp, m.deepCopy().tmp)
    nose.tools.assert_equal(m.tmp, 123)

    m.tmp = 1337
    nose.tools.assert_equal(m.tmp, 1337)
    nose.tools.assert_raises(Exception, pyvex.IRExpr.RdTmp, ())
    nose.tools.assert_equal(type(m), type(m.deepCopy()))

    print "F**K"
    irsb = pyvex.IRSB(bytes='\x90\x5d\xc3')
    print "TMP:", irsb.next.tmp
    nose.tools.assert_equal(irsb.next.tmp, irsb.next.deepCopy().tmp)
Beispiel #8
0
def test_irsb_popret():
    irsb = pyvex.IRSB(bytes='\x5d\xc3')
    stmts = irsb.statements
    irsb.pp()

    nose.tools.assert_greater(len(stmts), 0)
    nose.tools.assert_equal(irsb.jumpkind, "Ijk_Ret")
    nose.tools.assert_equal(irsb.offsIP, 184)

    cursize = len(irsb.tyenv.types)
    nose.tools.assert_greater(cursize, 0)
    print irsb.statements[16].data
    print irsb.statements[16].data.tmp
    print irsb.tyenv.types[irsb.statements[16].data]
    nose.tools.assert_equal(irsb.tyenv.typeOf(irsb.statements[16].data),
                            'Ity_I64')
def test_arm_postprocess():
    for i in xrange(3):
        # Thumb

        # push  {r7}
        # add   r7, sp, #0
        # mov.w r1, #6
        # mov   r0, pc
        # add.w lr, r0, r1
        # b.w   10408
        irsb = pyvex.IRSB(data=('\x80\xb4'
                                '\x00\xaf'
                                '\x4f\xf0\x06\x01'
                                '\x78\x46'
                                '\x00\xeb\x01\x0e'
                                '\xff\xf7\xec\xbf'),
                          mem_addr=0x1041f,
                          arch=archinfo.ArchARMEL(),
                          num_inst=6,
                          bytes_offset=1,
                          opt_level=i)
        nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

        # mov   lr, pc
        # b.w   10408
        irsb = pyvex.IRSB(data=('\xfe\x46'
                                '\xe9\xe7'),
                          mem_addr=0x10431,
                          arch=archinfo.ArchARMEL(),
                          num_inst=2,
                          bytes_offset=1,
                          opt_level=i)
        nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

        # add   r2, pc, #0
        # add.w lr, r2, #4
        # ldr.w pc, [pc, #52]
        irsb = pyvex.IRSB(data=('\x00\xa2'
                                '\x02\xf1\x06\x0e'
                                '\xdf\xf8\x34\xf0'),
                          mem_addr=0x10435,
                          arch=archinfo.ArchARMEL(),
                          num_inst=3,
                          bytes_offset=1,
                          opt_level=i)
        nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

        # ldr   r0, [pc, #48]
        # mov   r1, pc
        # add.w r2, r1, #4
        # add.w r3, r2, #4
        # add.w r4, r3, #4
        # add.w lr, r4, #4
        # mov   pc, r0
        irsb = pyvex.IRSB(data=('\x0c\x48'
                                '\x79\x46'
                                '\x01\xf1\x04\x02'
                                '\x02\xf1\x04\x03'
                                '\x03\xf1\x04\x04'
                                '\x04\xf1\x04\x0e'
                                '\x87\x46'),
                          mem_addr=0x1043f,
                          arch=archinfo.ArchARMEL(),
                          num_inst=7,
                          bytes_offset=1,
                          opt_level=i)
        nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

        # eor.w r0, r0, r0
        # mov   lr, pc
        # b.n   10460
        irsb = pyvex.IRSB(data=('\x80\xea\x00\x00'
                                '\x86\x46'
                                '\x01\xe0'),
                          mem_addr=0x10455,
                          arch=archinfo.ArchARMEL(),
                          num_inst=3,
                          bytes_offset=1,
                          opt_level=i)
        nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Boring')

        # Thumb compiled with optimizations (gcc -O2)

        # mov.w r1, #6
        # mov   r0, pc
        # add.w lr, r0, r1
        # b.w   104bc
        irsb = pyvex.IRSB(data=('\x4f\xf0\x06\x01'
                                '\x78\x46'
                                '\x00\xeb\x01\x0e'
                                '\x00\xf0\xc5\xb8'),
                          mem_addr=0x10325,
                          arch=archinfo.ArchARMEL(),
                          num_inst=4,
                          bytes_offset=1,
                          opt_level=i)
        nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

        # ldr   r0, [pc, #56]
        # mov   r1, pc
        # add.w r2, r1, #4
        # add.w r3, r2, #4
        # add.w r4, r3, #4
        # add.w lr, r4, #4
        # mov   pc, r0
        irsb = pyvex.IRSB(data=('\x0e\x48'
                                '\x79\x46'
                                '\x01\xf1\x04\x02'
                                '\x02\xf1\x04\x03'
                                '\x03\xf1\x04\x04'
                                '\x04\xf1\x04\x0e'
                                '\x87\x46'),
                          mem_addr=0x10333,
                          arch=archinfo.ArchARMEL(),
                          num_inst=7,
                          bytes_offset=1,
                          opt_level=i)
        nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

        # add   r2, pc, #0
        # add.w lr, r2, #6
        # ldr.w pc, [pc, #28]
        irsb = pyvex.IRSB(data=('\x00\xa2'
                                '\x02\xf1\x06\x0e'
                                '\xdf\xf8\x1c\xf0'),
                          mem_addr=0x10349,
                          arch=archinfo.ArchARMEL(),
                          num_inst=3,
                          bytes_offset=1,
                          opt_level=i)
        nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

        # mov   lr, pc
        # b.w   104bc
        irsb = pyvex.IRSB(data=('\xfe\x46'
                                '\xb2\xe0'),
                          mem_addr=0x10353,
                          arch=archinfo.ArchARMEL(),
                          num_inst=2,
                          bytes_offset=1,
                          opt_level=i)
        nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

        # eor.w r0, r0, r0
        # mov   lr, pc
        # b.n   10362
        irsb = pyvex.IRSB(data=('\x80\xea\x00\x00'
                                '\x86\x46'
                                '\x01\xe0'),
                          mem_addr=0x10357,
                          arch=archinfo.ArchARMEL(),
                          num_inst=3,
                          bytes_offset=1,
                          opt_level=i)
        nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Boring')

        # ARM compiled with optimizations (gcc -O2)

        # mov   r1, #4
        # mov   r0, pc
        # add   lr, r0, r1
        # ldr   pc, [pc, #56]
        irsb = pyvex.IRSB(data=('\x04\x10\xa0\xe3'
                                '\x0f\x00\xa0\xe1'
                                '\x01\xe0\x80\xe0'
                                '\x38\xf0\x9f\xe5'),
                          mem_addr=0x10298,
                          arch=archinfo.ArchARMEL(),
                          num_inst=4,
                          opt_level=i)
        nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

        # add   r1, pc, #0
        # add   r2, r1, #4
        # add   r3, r2, #4
        # add   r4, r3, #4
        # add   lr, r4, #4
        # b     10414
        irsb = pyvex.IRSB(data=('\x00\x10\x8f\xe2'
                                '\x04\x20\x81\xe2'
                                '\x04\x30\x82\xe2'
                                '\x04\x40\x83\xe2'
                                '\x04\xe0\x84\xe2'
                                '\x54\x00\x00\xea'),
                          mem_addr=0x102a8,
                          arch=archinfo.ArchARMEL(),
                          num_inst=6,
                          opt_level=i)
        nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

        # mov   lr, pc
        # b     10414
        irsb = pyvex.IRSB(data=('\x0f\xe0\xa0\xe1'
                                '\x52\x00\x00\xea'),
                          mem_addr=0x102c0,
                          arch=archinfo.ArchARMEL(),
                          num_inst=2,
                          opt_level=i)
        nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

        # eor   r0, r0, r0
        # mov   lr, r0
        # b     102d8
        irsb = pyvex.IRSB(data=('\x00\x00\x20\xe0'
                                '\x00\xe0\xa0\xe1'
                                '\x00\x00\x00\xea'),
                          mem_addr=0x102c8,
                          arch=archinfo.ArchARMEL(),
                          num_inst=3,
                          opt_level=i)
        nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Boring')

        # ARM

        # push  {fp}
        # add   fp, sp, #0
        # mov   r1, #4
        # mov   r0, pc
        # add   lr, r0, r1
        # ldr   pc, [pc, #68]
        irsb = pyvex.IRSB(data=('\x04\xb0\x2d\xe5'
                                '\x00\xb0\x8d\xe2'
                                '\x04\x10\xa0\xe3'
                                '\x0f\x00\xa0\xe1'
                                '\x01\xe0\x80\xe0'
                                '\x44\xf0\x9f\xe5'),
                          mem_addr=0x103e8,
                          arch=archinfo.ArchARMEL(),
                          num_inst=6,
                          opt_level=i)
        nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

        # add   r1, pc, #0
        # add   r2, r1, #4
        # add   r3, r2, #4
        # add   r4, r3, #4
        # add   lr, r4, #4
        # b     103c4
        irsb = pyvex.IRSB(data=('\x00\x10\x8f\xe2'
                                '\x04\x20\x81\xe2'
                                '\x04\x30\x82\xe2'
                                '\x04\x40\x83\xe2'
                                '\x04\xe0\x84\xe2'
                                '\x54\xff\xff\xea'),
                          mem_addr=0x10400,
                          arch=archinfo.ArchARMEL(),
                          num_inst=6,
                          opt_level=i)
        nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

        # mov   lr, pc
        # b     103c4
        irsb = pyvex.IRSB(data=('\x0f\xe0\xa0\xe1'
                                '\xe8\xff\xff\xea'),
                          mem_addr=0x10418,
                          arch=archinfo.ArchARMEL(),
                          num_inst=2,
                          opt_level=i)
        nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

        # eor   r0, r0, r0
        # mov   lr, r0
        # b     10430
        irsb = pyvex.IRSB(data=('\x00\x00\x20\xe0'
                                '\x00\xe0\xa0\xe1'
                                '\x00\x00\x00\xea'),
                          mem_addr=0x10420,
                          arch=archinfo.ArchARMEL(),
                          num_inst=3,
                          opt_level=i)
        nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Boring')
Beispiel #10
0
    def handleInsns(self):
        consts = {}
        ips = []

        #set dafaukt value for PC, SP, BP
        pc_offset = self.arch.ip_offset
        regs = {pc_offset: 0, self.arch.sp_offset: 1, self.arch.bp_offset: 2}
        consts = {}
        irsbs = []

        for instr_c in range(len(self.insns_list)):
            off = self.insns_list[instr_c][0]
            instr = self.insns_list[instr_c][1]

            #manage instruction not recognized by libVEX
            if self.arch.name == "X86" or self.arch.name == "AMD64":
                if instr == "\xf4":  #hlt x86 instruction
                    irsbs.append("HALT")
                    continue
                elif instr.startswith("\xf0"):  #lock x86 prefix
                    irsbs.append("LOCK")
                    if len(instr) == 1:
                        continue
                    instr = instr[1:]
            try:
                irsb = pyvex.IRSB(instr, off, self.arch, opt_level=0)
            except pyvex.errors.PyVEXError as err:
                print(
                    "[Please report to the developer] Error with instruction "
                    + instr.encode("hex"))
                raise err
            irsbs.append(irsb)

            stmts = irsb.statements
            n_addr = 0

            for i in range(len(stmts)):
                #TODO PutI GetI
                if isinstance(stmts[i], pyvex.stmt.IMark):
                    n_addr = stmts[i].addr + stmts[i].len
                elif isinstance(stmts[i], pyvex.stmt.Put):

                    if stmts[i].offset == pc_offset and len(
                            stmts[i].constants) == 1:
                        c = stmts[i].constants[0]
                        if c.value in self.targets:
                            stmts[i].data = StrConst(self.targets[c.value])
                            stmts[i].offset = 0
                            continue
                        elif c.value == n_addr:
                            stmts[i].data = StrConst("_NEXT_")
                            stmts[i].offset = 0
                            continue
                        else:
                            ips.append(c.value)
                            stmts[i].reg_name = 0xABADCAFE
                            stmts[i].offset = 0
                    else:
                        # constants replace
                        for j in range(len(stmts[i].constants)):
                            c = stmts[i].constants[j]
                            if c.value in self.targets:
                                stmts[i].constants[j] = StrConst(
                                    self.targets[c.value])
                            elif c.value == n_addr:
                                stmts[i].constants[j] = StrConst("_NEXT_")
                            else:
                                # constants abstraction
                                consts[c.value] = consts.get(
                                    c.value, len(consts))
                                c.value = consts[c.value]

                        # registers abstraction
                        regs[stmts[i].offset] = regs.get(
                            stmts[i].offset, len(regs))
                        stmts[i].offset = regs[stmts[i].offset]
                elif isinstance(stmts[i], pyvex.stmt.Exit):
                    c = stmts[i].dst
                    if c.value in self.targets:
                        stmts[i] = "if (%s) { PUT(offset=0) = %s; %s }" % (
                            stmts[i].guard, self.targets[c.value],
                            stmts[i].jumpkind)
                        continue
                    else:
                        ips.append(c.value)
                        stmts[i].reg_name = 0xDEADBEEF
                else:
                    # constants replace
                    for j in range(len(stmts[i].constants)):
                        c = stmts[i].constants[j]
                        if c.value in self.targets:
                            stmts[i].constants[j] = StrConst(
                                self.targets[c.value])
                        elif c.value == n_addr:
                            stmts[i].constants[j] = StrConst("_NEXT_")
                        else:
                            # constants abstraction
                            consts[c.value] = consts.get(c.value, len(consts))
                            c.value = consts[c.value]

                for expr in stmts[i].expressions:
                    if isinstance(expr, pyvex.expr.Get):
                        # registers abstraction
                        regs[expr.offset] = regs.get(expr.offset, len(regs))
                        expr.offset = regs[expr.offset]

        #order addresses
        addrs = {}
        ips.sort()
        for i in range(len(ips)):
            addrs[ips[i]] = i

        #self.vex_code = ""
        #self.shingled_code = ""

        vexhash = datasketch.MinHash(num_perm=64)
        shingled = {}
        last = ""

        for c in range(len(irsbs)):
            irsb = irsbs[c]

            if type(irsb) == type(""):
                ngram = last + irsb
                #self.vex_code += "+++ Instr #%d +++\n%s\n" % (c, irsb)
                shingled[ngram] = shingled.get(ngram, 0) + 1
                last = irsb
                continue

            stmts = irsb.statements
            ins = ""

            for i in range(len(stmts)):
                if isinstance(stmts[i], pyvex.stmt.IMark) or isinstance(
                        stmts[i], pyvex.stmt.AbiHint):
                    continue

                if hasattr(stmts[i], "reg_name"):
                    if stmts[i].reg_name == 0xABADCAFE:
                        stmts[i].constants[0].value = addrs[
                            stmts[i].constants[0].value]
                    elif stmts[i].reg_name == 0xDEADBEEF:
                        stmts[i].dst.value = addrs[stmts[i].dst.value]

                v = str(stmts[i]) + "\n"
                ins += v
                ngram = last + v
                shingled[ngram] = shingled.get(ngram, 0) + 1
                last = v

            #self.vex_code += "+++ Instr #%d +++\n%s\n" % (c, ins)

        for ngram in shingled:
            for c in range(shingled[ngram]):
                vexhash.update("[%d]\n%s" % (c, ngram))
                #self.shingled_code += "[%d]\n%s" % (c, ngram)

        lean_vexhash = datasketch.LeanMinHash(vexhash)
        vexhash_buf = bytearray(lean_vexhash.bytesize())
        lean_vexhash.serialize(vexhash_buf)

        self.vexhash = str(vexhash_buf)
Beispiel #11
0
def test_irsb_arm():
    irsb = pyvex.IRSB(bytes='\x33\xff\x2f\xe1', arch="VexArchARM")
    nose.tools.assert_equal(
        sum([1 for i in irsb.statements if type(i) == pyvex.IRStmt.IMark]), 1)
Beispiel #12
0
def test_irstmt_pp():
    irsb = pyvex.IRSB(bytes='\x5d\xc3')
    stmts = irsb.statements
    for i in stmts:
        print "STMT: ",
        print i