Ejemplo n.º 1
0
def readInstsCompared(inst_pb):
    global InstsInfoCompared
    global InstsInfoComparedSet
    module = blocks_pb2.module()
    tmpFuncSet = set()
    try:
        pb_file = open(inst_pb, 'rb')
        module.ParseFromString(pb_file.read())
        pb_file.close()
    except IOError:
        logging.error("Could not open the file %s!" % (inst_pb))
    for func in module.fuc:
        tmpFuncSet.add(func.va)
        prev_size = -1
        inst_compared = False
        for bb in func.bb:
            prev_va = -1
            for inst in bb.instructions:
                inst_size = 0
                if inst.size != 0:
                    inst_size = inst.size
                    InstsInfoComparedSet[inst.va] = inst_size
                    InstsInfoCompared.append((inst.va, inst_size))
                elif prev_va != -1:
                    inst_compared = True
                    inst_size = inst.va - prev_va
                    InstsInfoComparedSet[prev_va] = inst_size
                    InstsInfoCompared.append((prev_va, inst_size))
                prev_va = inst.va
            if inst_compared:
                inst_size = bb.size + bb.va - prev_va
                InstsInfoComparedSet[prev_va] = inst_size
                InstsInfoCompared.append((prev_va, inst_size))
    InstsInfoCompared.sort(key=(lambda tup: tup[0]))
Ejemplo n.º 2
0
def dumpBlocks(bv, output):
    module = blocks_pb2.module()
    for (func_idx, func) in enumerate(bv.functions):
        pbFunc = module.fuc.add()
        pbFunc.va = func.start
        binja.log_info("Function {0}: {1}".format(func_idx, func.start))
        for (blk_idx, block) in enumerate(func):
            blk_start = None
            pbBB = pbFunc.bb.add()
            pbBB.va = block.start
            # can't get the basic block size for now
            pbBB.parent = pbFunc.va
            block_start = block.start
            binja.log_info("\tBasic Block {0:x}: {1:x}".format(
                blk_idx, block_start))
            insn_cur = block_start
            if not block.can_exit:
                pbBB.type = 0x20  # ninja potentially non-return type
                binja.log_info("\t bb 0x%x can exit" % pbBB.va)

            for insn in block:
                instruction = pbBB.instructions.add()
                instruction.va = insn_cur
                binja.log_info("\t\t{0:x}".format(insn_cur))
                insn_cur += insn[1]
            for (successor_idx, out_edge) in enumerate(block.outgoing_edges):
                print(out_edge)
                binja.log_info("\t\tsuccessor {0:x}: {1:x}".format(
                    successor_idx, out_edge.target.start))
                child = pbBB.child.add()
                child.va = out_edge.target.start
    f = open(output, "wb")
    f.write(module.SerializeToString())
    f.close()
Ejemplo n.º 3
0
def dumpInsts(binary, output):
    try:
        output_tmp = randomString()
        execute_str = "objdump --wide -d %s | egrep '^[[:space:]]*[0-9a-f]+:' | cut -d: -f1 | awk '{print \"0x\"$1}' | tee /tmp/%s.log" % (
            binary, output_tmp)
        print(execute_str)
        os.system(execute_str)
        with open("/tmp/%s.log" % (output_tmp)) as objdump_file:
            module = blocks_pb2.module()
            # because objdump doesn't have function and basic block if the binary is striped
            dummy_func = module.fuc.add()
            dummy_func.va = 0x0
            dummy_bb = dummy_func.bb.add()
            dummy_bb.va = 0x0
            dummy_bb.parent = 0x0
            for line in objdump_file:
                line = line.strip()
                addr = int(line, 16)
                instruction = dummy_bb.instructions.add()
                instruction.va = addr
            f = open(output, "wb")
            f.write(module.SerializeToString())
            f.close()
    except Exception as e:
        traceback.print_exc()
        return

    os.system("rm -f /tmp/%s.log" % (output_tmp))
Ejemplo n.º 4
0
def dumpBlocks(output):
    bbModel = BasicBlockModel(currentProgram)
    functionManager = currentProgram.getFunctionManager()
    module = blocks_pb2.module()
    # record the basic block that has been added by functions
    bb_set = set()
    # get all functions
    funcs_set = set()
    for func in functionManager.getFunctions(True):
        # we skip external functions
        if func.isExternal():
            continue
        func_va = func.getEntryPoint().getOffset()
        if func_va in funcs_set:
            continue
        funcs_set.add(func_va)
        logging.debug("Function address is 0x{0:x}".format(
            func.getEntryPoint().getOffset()))
        codeBlockIterator = bbModel.getCodeBlocksContaining(
            func.getBody(), monitor)
        pbFunc = module.fuc.add()
        pbFunc.va = func.getEntryPoint().getOffset()

        if func.hasNoReturn():
            pbFunc.type = 0x5
            logging.debug("function at 0x%x does not return!" % pbFunc.va)

        # iter over the basic blocks
        while codeBlockIterator.hasNext():
            bb = codeBlockIterator.next()
            pbBB = pbFunc.bb.add()
            bb_set.add(bb.getMinAddress().getOffset())
            addBB(pbBB, bb, pbFunc)

    codeBlocks = bbModel.getCodeBlocks(monitor)
    dummy_func = module.fuc.add()
    dummy_func.va = 0x0
    while codeBlocks.hasNext():
        bb = codeBlocks.next()
        if bb.getMinAddress().getOffset() in bb_set:
            continue
        pbBB = dummy_func.bb.add()
        bb_set.add(bb.getMinAddress().getOffset())
        logging.debug("Find another basic block 0x%x" %
                      (bb.getMinAddress().getOffset()))
        addBB(pbBB, bb, dummy_func)

    f = open(output, "wb")
    f.write(module.SerializeToString())
    f.close()
Ejemplo n.º 5
0
def readInstsInfo(inst_pb):
    global InstsInfo
    global InstsInfoSet
    global groundTruthInstructionsRange
    module = blocks_pb2.module()
    tmpFuncSet = set()
    global groundTruthFuncRange
    try:
        pb_file = open(inst_pb, 'rb')
        module.ParseFromString(pb_file.read())
        pb_file.close()
    except IOError:
        logging.error("Could not open the file %s!" % (inst_pb))
    range_start = 0x0
    range_end = 0x0
    for func in module.fuc:
        tmpFuncSet.add(func.va)
        for bb in func.bb:
            # add the range
            if bb.va != range_end:
                if range_start != range_end:
                    groundTruthInstructionsRange.append(
                        (range_start, range_end))
                range_start = bb.va
                range_end = bb.va + bb.size - bb.padding
            else:
                range_end += bb.size - bb.padding

            for inst in bb.instructions:
                InstsInfo.append((inst.va, inst.size))
                groundTruthFuncRange[inst.va] = inst.size
                InstsInfoSet[inst.va] = inst.size
    if range_start != range_end:
        groundTruthInstructionsRange.append((range_start, range_end))

    InstsInfo.sort(key=(lambda tup: tup[0]))

    for func in linkerFuncAddr:
        if func not in tmpFuncSet:
            notIncludedLinkerFunc.add(func)
Ejemplo n.º 6
0
def readInstsInfo(inst_pb):
    global InstsInfo
    global InstsInfoSet
    global groundTruthInstructionsRange
    global BRANCH_RANGES
    module = blocks_pb2.module()
    tmpFuncSet = set()
    try:
        pb_file = open(inst_pb, 'rb')
        module.ParseFromString(pb_file.read())
        pb_file.close()
    except IOError:
        logging.error("Could not open the file %s!" % (inst_pb))

    range_start = 0x0
    range_end = 0x0
    for func in module.fuc:
        tmpFuncSet.add(func.va)
        for bb in func.bb:
            if bb.va != range_end:
                if range_start != range_end:
                    groundTruthInstructionsRange.append(
                        (range_start, range_end))
                range_start = bb.va
                range_end = bb.va + bb.size - bb.padding
            else:
                range_end += bb.size - bb.padding

            if len(bb.instructions) > 0 and bb.type in \
                    {BlockType.COND_BRANCH, BlockType.DIRECT_BRANCH, BlockType.INDIRECT_BRANCH, BlockType.INDIRECT_CALL, BlockType.DIRECT_CALL}:
                last_inst = bb.instructions[-1]
                BRANCH_RANGES.append(
                    (last_inst.va, last_inst.va + last_inst.size))
            for inst in bb.instructions:
                InstsInfo.append((inst.va, inst.size))
                InstsInfoSet[inst.va] = inst.size

    InstsInfo.sort(key=(lambda tup: tup[0]))
Ejemplo n.º 7
0
    (options, args) = parser.parse_args()
    if options.groundtruth == None:
        print("Please input the ground truth file")
        exit(-1)
    if options.comparedfile == None:
        print("Please input the compared file")
        exit(-1)

    if options.binaryFile == None:
        print("Please input the binary file")
        exit(-1)

    exec_secs = parsePEExecSecs(options.binaryFile)
    (IMAGE_BASE, ELFClasss) = parsePEFile(options.binaryFile)

    mModule1 = blocks_pb2.module()
    mModule2 = blocks_pb2.module()
    try:
        f1 = open(options.groundtruth, 'rb')
        mModule1.ParseFromString(f1.read())
        f1.close()
        f2 = open(options.comparedfile, 'rb')
        mModule2.ParseFromString(f2.read())
        f2.close()
    except IOError:
        print("Could not open the file\n")
        exit(-1)

    # confirm which tool we are handling
    confirmTools(options.comparedfile)
Ejemplo n.º 8
0
def dumpBlocks(binary, output, output_sta):
    # "force_complete_scan" default is True
    p = angr.Project(binary, load_options={'auto_load_libs': False})
    cfg = p.analyses.CFGFast(normalize=True, detect_tail_calls = True)
    # output func matching counts
    # outputFuncMatching(cfg, output_sta)

    module = blocks_pb2.module()

    # iter over the cfg functions
    for func_addr in cfg.functions:
        func = cfg.functions[func_addr]
        if func.returning == False:
            print("Non-return function at 0x%x" % func.addr)

        if func.alignment:
            print("function 0x%x is alignment function, skip!" % (func.addr))
            continue

        # collect non-return calls
        current_non_bbs = set()
        for non_ret in func.callout_sites:
            if non_ret != None:
                print("non-return call at 0x%x" % non_ret.addr)
                current_non_bbs.add(non_ret.addr)
        #[current_non_bbs.add(non_ret.addr) for non_ret in func.callout_sites]
        pbFunc = module.fuc.add()
        pbFunc.va = func_addr
        print("function %s, its addr is 0x%x" % (func.name, func.addr))
        # iter over blocks
        for bb in func.blocks:
            if bb == None:
                continue
            print("basic block addr 0x%x, its size 0x%x" % (bb.addr, bb.size))
            cfg_node = cfg.get_any_node(bb.addr)
            # bb.instruction_addrs can get the instrction address of block
            if cfg_node != None and bb.size != 0:
                pbBB = pbFunc.bb.add()
                pbBB.va = bb.addr
                pbBB.size = bb.size
                pbBB.parent = func_addr
                successors = cfg_node.successors
                for suc in successors:
                    child = pbBB.child.add()
                    child.va = suc.addr
                    print("Edge 0x%x -> 0x%x" % (bb.addr, suc.addr))

                # iter over instructions
                # bb.instruction_addrs may have bug
                # we use capstone instead to extract instuction
                # for inst in bb.instruction_addrs:
                for inst in bb.capstone.insns:
                    inst_va = inst.address
                    instruction = pbBB.instructions.add()
                    instruction.va = inst_va
                    print("instruction: 0x%x" % (instruction.va))
                    # can't get its size from angr for now

            if bb.addr in current_non_bbs:
                pbBB.type = BlockType.NON_RETURN_CALL

    f = open(output, "wb")
    f.write(module.SerializeToString())
    f.close()
Ejemplo n.º 9
0
    (options, args) = parser.parse_args()
    assert options.groundtruth != None, "Please input the ground truth file"
    assert options.binaryfile != None, "Please input the binary file"

    if options.ispe:
        IS_PE = True

    if not IS_PE:
        init_elf(options.binaryfile)
    else:
        init_pe(options.binaryfile)

    if ELFCLASS == 32:
        IAT_BASE = 0x0

    module = blocks_pb2.module()
    try:
        f1 = open(options.groundtruth, 'rb')
        module.ParseFromString(f1.read())
        f1.close()
    except IOError:
        logging.error("Could not open the file %s" % options.groundtruth)
        exit(-1)

    known_nonret_funcs = set()
    iat_known_nonret_funcs = set()

    if not IS_PE:
        getNonRetFuncsFromSymbols(options.binaryfile, known_nonret_funcs)
    else:
        iats = getNonRetFuncsFromImportObjs(options.binaryfile,
Ejemplo n.º 10
0
def dumpBB(binary, output):
    # store the cfg edge.
    cfg_edge = dict()
    non_ret_sites = set()
    try:
        dump_tmp = randomString()
        abs_path = os.path.abspath(binary)
        basename = os.path.basename(binary)
        execute_str = "bap %s -d -drcfg -dasm --passes=with-no-return --print-bir-attr=address > /tmp/%s.dump" % (abs_path, dump_tmp)
        logging.info("execute string is %s" % (execute_str))
        os.system(execute_str)
        
        ## collect all non-ret call sites
        non_ret_tmp = randomString()
        execute_str1 = 'grep "address\|call @.* with noreturn" /tmp/%s.dump > /tmp/%s.log' % (dump_tmp, non_ret_tmp)
        os.system(execute_str1)
        valid_address = -1
        with open("/tmp/%s.log" % (non_ret_tmp), "r") as non_ret_file:
            for line in non_ret_file:
                if 'address' in line:
                    try:
                        valid_address = int(line.split(' ')[-1].strip(), 16)
                        continue
                    except:
                        pass
                if 'noreturn' in line and valid_address != -1:
                    logging.debug('collect noret site %x' % valid_address)
                    non_ret_sites.add(valid_address)
                    valid_address = -1


        grep_cfg_tmp = randomString()
        execute_str2 = 'grep -e "->" /tmp/%s.dump | tr -s " " | cut -d \\" -f2,4 | awk "{print $1 $2}" > /tmp/%s.log' % (dump_tmp, grep_cfg_tmp)
        os.system(execute_str2)
        logging.info("excute string is %s" % (execute_str2))
        with open("/tmp/%s.log" % (grep_cfg_tmp), "r") as grep_cfg_file:
            for line in grep_cfg_file:
                line = line.split('"')
                start = int(line[0], 16)
                end = int(line[1], 16)
                logging.info("edge 0x%x -> 0x%x" % (start, end))
                if start in cfg_edge:
                    cfg_edge[start].add(end)
                else:
                    tmp_set = set()
                    tmp_set.add(end)
                    cfg_edge[start] = tmp_set
        os.system('rm /tmp/%s.log' % (grep_cfg_tmp))
        os.system('rm /tmp/%s.log' % (non_ret_tmp))
    except Exception as e:
        traceback.print_exc()
        return

    # get the function and basic block information
    try:
        #bb_dump_tmp = randomString()
        #execute_str3 = "bap %s -dasm > /tmp/%s.dump" % (binary, bb_dump_tmp)
        #logging.info("execute string is %s" % (execute_str3))
        #os.system(execute_str3)
        grep_bb_tmp = randomString()
        execute_str4 = 'sed -ne "/Disassembly of/,$ p" /tmp/%s.dump | egrep "^[[:space:]]*[0-9a-f]+:" | cut -d : -f1 | awk "{print $1}" > /tmp/%s.log' % (dump_tmp, grep_bb_tmp)
        logging.info("execute string is %s" % (execute_str4))
        os.system(execute_str4)
        last_last_inst_addr = None
        last_inst_addr = None
        cur_inst_addr = None
        last_func_addr = None
        last_bb_addr = None
        pb_cur_func = None
        pb_cur_bb = None

        with open("/tmp/%s.log" % (grep_bb_tmp), "r") as grep_bb_file:
            module = blocks_pb2.module()
            for line in grep_bb_file:
                cur_inst_addr = int(line.strip(), 16)
                if cur_inst_addr == last_inst_addr:
                    # find the new function
                    if last_inst_addr == last_last_inst_addr:
                        logging.info("current function addr 0x%x" % (cur_inst_addr))
                        # delete the last function's last basic block
                        if pb_cur_func != None:
                            del pb_cur_func.bb[-1]
                        pb_cur_func = module.fuc.add()
                        pb_cur_func.va = cur_inst_addr

                        # WARNING: the result lacks of basic block size
                        pb_cur_bb = pb_cur_func.bb.add()
                        pb_cur_bb.va = cur_inst_addr
                        pb_cur_bb.parent = pb_cur_func.va

                        # add the current basic block successors
                        # WARNING: the successors does not contain the `call` instruction target
                        successors = set() 
                        if cur_inst_addr in cfg_edge:
                            successors = cfg_edge[cur_inst_addr]
                        for suc in successors:
                            child = pb_cur_bb.child.add()
                            child.va = suc
                        instruction = pb_cur_bb.instructions.add()
                        instruction.va = cur_inst_addr
                        if cur_inst_addr in non_ret_sites:
                            instruction.call_type = 4 # call a non-return
                            logging.debug("set non-return instruction at 0x%x, call_type is 0x%x" % (instruction.va, instruction.call_type))


                    elif pb_cur_func != None: # find the new basic block
                        if pb_cur_bb != None:
                            del pb_cur_bb.instructions[-1]

                        logging.info("current basic block addr 0x%x" % (cur_inst_addr))
                        # WARNING: the result lacks of basic block size
                        pb_cur_bb = pb_cur_func.bb.add()
                        pb_cur_bb.va = cur_inst_addr
                        pb_cur_bb.parent = pb_cur_func.va
                        # add the current basic block successors
                        # WARNING: the successors does not contain the `call` instruction target
                        successors = set() 
                        if cur_inst_addr in cfg_edge:
                            successors = cfg_edge[cur_inst_addr]
                        for suc in successors:
                            child = pb_cur_bb.child.add()
                            child.va = suc
                        instruction = pb_cur_bb.instructions.add()
                        instruction.va = cur_inst_addr

                        if cur_inst_addr in non_ret_sites:
                            instruction.call_type = 4 # call a non-return
                            logging.debug("set non-return instruction at 0x%x, call_type is 0x%x" % (instruction.va, instruction.call_type))

                # current instruction
                elif pb_cur_bb != None:
                    # WARNING: the result lacks of basic block size
                    logging.info("current instruction addr 0x%x" % (cur_inst_addr))
                    instruction = pb_cur_bb.instructions.add()
                    instruction.va = cur_inst_addr

                    # set current bb type
                    if cur_inst_addr in non_ret_sites:
                        instruction.call_type = 4 # call a non-return

                last_last_inst_addr = last_inst_addr
                last_inst_addr = cur_inst_addr
            f = open(output, "wb")
            f.write(module.SerializeToString())
            f.close()

        os.system('rm /tmp/%s.dump' % (dump_tmp))
        os.system('rm /tmp/%s.log' % (grep_bb_tmp))
    except Exception as e:
        traceback.print_exc()
        return
Ejemplo n.º 11
0
def dumpBlocks(binary, output, statics):
    prelude_funcs = set()
    try:
        r2 = r2pipe.open(binary)
    except:
        logging.error("r2pipe open binary error!")
        exit(-1)

    pbModule = blocks_pb2.module()
    ## analyse all
    #r2.cmd('aaa')
    r2.cmd('aa')
    r2.cmd('e anal.depth = 0x10000000')
    # recursivly disassemble from main function
    r2.cmd('s main')
    r2.cmd("afr")
    # aac heuristic, default is on in `aaa` analysis
    r2.cmd('aac')
    # scan the function prologue
    logging.debug("Before aap Analysis...")
    prelude_results = r2.cmd('aap')
    logging.debug(prelude_results)
    logging.debug("Done aap Analysis!")
    count_prelude = 0x0
    for res in prelude_results.split('\n'):
        # demo output: [Binpang Debug]: Preludecnt number is 3
        if "Binpang" not in res:
            continue
        if "Preclude" in res:
            prelude_addr = int(res.split()[-1], 16)
            prelude_funcs.add(prelude_addr)
        else:
            count_prelude += int(res.split()[-1], 10)

    r2.cmd('aanr')
    afl_result = r2.cmd('aflj')
    afl_result = json.loads(afl_result)
    all_func_result = set()
    for func in afl_result:
        func_addr = func['offset']
        if func_addr in all_func_result:
            continue
        no_return = func['noreturn']
        all_func_result.add(func_addr)
        pbFunc = pbModule.fuc.add()
        pbFunc.va = func_addr
        if no_return == True:
            pbFunc.type = 0x5

        logging.info("Find function in %x" % (func_addr))
        # seek the function start address
        r2.cmd('s %d' % func_addr)
        # output current function's basic block information
        afb_result = r2.cmd("afbj")
        try:
            afb_result = json.loads(afb_result)
        except:
            continue
        for bb in afb_result:
            bb_addr = bb['addr']
            bb_size = bb['size']
            pbBB = pbFunc.bb.add()
            pbBB.va = bb_addr
            pbBB.size = bb_size
            pbBB.parent = func_addr
            logging.info("Find basic block %x" % bb_addr)
            r2.cmd('s %d' % bb_addr)
            inst_num = bb['ninstr']
            inst_result = r2.cmd('pdj %d' % bb['ninstr'])
            try:
                inst_result = json.loads(inst_result)
            except:
                continue
            for inst in inst_result:
                inst_addr = inst['offset']
                inst_size = inst['size']
                logging.info("Find instruction %x, size %x" %
                             (inst_addr, inst_size))
                instruction = pbBB.instructions.add()
                instruction.va = inst_addr
                instruction.size = inst_size

            # basic block fail address
            bb_fail = bb.get("fail", None)
            # basic block jump address
            bb_jmp = bb.get("jump", None)
            if bb_fail != None:
                logging.info("Successor: 0x%x" % (bb_fail))
                child = pbBB.child.add()
                child.va = bb_fail
            if bb_jmp != None:
                logging.info("Successor: 0x%x" % (bb_jmp))
                child = pbBB.child.add()
                child.va = bb_jmp

            # get switch cases successors
            switch_op = bb.get("switch_op", None)
            if switch_op == None:
                continue
            bb_cases = switch_op.get("cases", None)
            if bb_cases == None:
                continue
            visited_cases = set()
            for (idx, case) in enumerate(bb_cases):
                if case['addr'] in visited_cases:
                    continue
                visited_cases.add(case['addr'])
                logging.info("jmptbl successor#%d: 0x%x" % (idx, case['addr']))
                child = pbBB.child.add()
                child.va = case['addr']

    f = open(output, "wb")
    f.write(pbModule.SerializeToString())
    f.close()

    # dump scan function information
    outputFuncMatching(all_func_result, prelude_funcs, statics)