Exemple #1
0
def processRefs(output):
    """
    process all the xrefs that ida recognizes

    params:
        output: protobuf file path
    returns:
    """

    refInf = refInf_pb2.RefList()
    # iterate over all valid terms
    for head in idautils.Heads():
        is_code = False
        candidateRefs = None
        if idc.is_code(idc.get_full_flags(head)):
            is_code = True
            decoded_inst = ida_ua.insn_t()
            insn_len = ida_ua.decode_insn(decoded_inst, head)
            if insn_len > 0:
                candidateRefs = getCandidateRefsFromInsn(decoded_inst)
        if is_code and candidateRefs == None:
            continue

        for xref in idautils.XrefsFrom(head, 0):
            ref_from = head
            target_addr = xref.to
            # check if target_addr is in current instruction internal representation
            if is_code:
                if target_addr not in candidateRefs:
                    continue
                else:
                    ref_from = candidateRefs[target_addr]
            if is_invalid_ea(target_addr):
                continue

            logging.debug(
                "Ref: 0x%x -> 0x%x, type is %s" %
                (ref_from, target_addr, idautils.XrefTypeName(xref.type)))
            ref = refInf.ref.add()
            ref.ref_va = ref_from
            ref.target_va = target_addr
            # default value
            ref.ref_size = 8
            target_is_code = idc.is_code(idc.get_full_flags(target_addr))
            if is_code and target_is_code:
                ref.kind = 0  # c2c
            elif is_code and not target_is_code:
                ref.kind = 1  # c2d
            elif not is_code and target_is_code:
                ref.kind = 2  # d2c
            else:
                ref.kind = 3  # d2d

    ## save the protobuf result
    with open(output, 'wb') as pbOut:
        pbOut.write(refInf.SerializeToString())
Exemple #2
0
def dumpRefs(output):
    refInf = refInf_pb2.RefList()
    listing = currentProgram.getListing()
    refSet = set()
    # record the basic block that has been added by functions
    for codeunit in listing.getCodeUnits(True):
        if time.time() > timeout:
            logging.error("Oh, sorry! time out!")
            exit(-1)
        if isinstance(codeunit, Instruction):
            target_vas = set()
            for pcode in codeunit.getPcode():
                for varnode in pcode.getInputs():
                    if varnode.isAddress() or varnode.isConstant():
                        target_vas.add(varnode.getOffset())
                output_varnode = pcode.getOutput()
                if output_varnode != None and (output_varnode.isAddress() or output_varnode.isConstant()):
                    target_vas.add(output_varnode.getOffset())

            collected_refs = set()
            for xref in codeunit.getReferencesFrom():
                if xref.isStackReference():
                    continue
                target_va = xref.getToAddress().getOffset()
                # make sure the target_va is in current instruction's internal represent
                if target_va not in target_vas or \
                        target_va in collected_refs:
                    continue
                collected_refs.add(target_va)
                ref = refInf.ref.add()
                ref.ref_va = xref.getFromAddress().getOffset()
                ref.target_va = target_va
                # TODO: Get the correct size and kind
                ref.ref_size = 8
                ref.kind = 0
                logging.info("[code]: From 0x%x -> 0x%x" % (ref.ref_va, ref.target_va))
        else:
            for xref in codeunit.getReferencesFrom():
                ref_addr = xref.getFromAddress().getOffset()
                if ref_addr in refSet:
                    continue
                logging.info("[data]: From 0x%x -> 0x%x" % 
                        (xref.getFromAddress().getOffset(), xref.getToAddress().getOffset()))
                refSet.add(ref_addr)
                ref = refInf.ref.add()
                ref.ref_va = xref.getFromAddress().getOffset()
                ref.target_va = xref.getToAddress().getOffset() & 0xffffffffffffffff
                #ref.target_va = 0
                ref.ref_size = 0
                ref.kind = 0
    logging.debug("Collect Refs done! ready to write output...")
    f = open(output, "wb")
    f.write(refInf.SerializeToString())
    f.close()
Exemple #3
0
    if not_included != None:
        logging.debug(
            "Append the not included functions! {0}".format(not_included))
        notIncludedLinkerFunc != not_included
    load_range = getLoadAddressRange(options.binary)
    load_range = enlargeRange(load_range)
    logging.debug("load range is {}".format(load_range))
    getLinkerFunctionRange(options.binary)
    elfclass = readElfClass(options.binary)
    MD = init_capstone(elfclass)
    readSecRange(options.binary)
    loadedSegs = get_loaded_info(options.binary)
    PIE = isPIE(options.binary)
    if PIE:
        disassembler_base_addr = getPIEBaseOffset(options.comparedfile)
    refInf1 = refInf_pb2.RefList()
    refInf2 = refInf_pb2.RefList()
    try:
        f1 = open(options.groundtruth, 'rb')
        refInf1.ParseFromString(f1.read())
        f1.close()
    except:
        print("Could not open the file: %s\n" % options.groundtruth)
        exit(-1)

    try:
        f2 = open(options.comparedfile, 'rb')
        refInf2.ParseFromString(f2.read())
        f2.close()
    except:
        print("Could not open the file: %s\n" % options.comparedfile)
Exemple #4
0
def parse_fixup(pdb_file, bin_path, output):
    
    tmp_file = './tmp_cvdump.fixup'
    
    with open(tmp_file, 'w+') as fout:
        ret_status = subprocess.call("dumpbin -HEADERS %s" % bin_path, stdout=fout, shell = True)
        if ret_status:
            logging.error("dump symbols of binary")
            exit(-1)
    fout.close()

    image_base = 0x0
    find_base = False
    with open(tmp_file, 'r+') as fout:
        for line in fout.readlines():
            if 'image base' in line:
                split_line = line.strip().split()
                image_base = int(split_line[0], 16)
                logging.debug("Image base is 0x%lx", image_base)
                find_base = True
                break
    fout.close()
    if not find_base:
        logging.error("Can't parse the image base correctly!")
        exit(-1)
                

    ret_status = subprocess.call('cvdump -?', stdout = subprocess.PIPE, stderr = subprocess.PIPE, shell=True)
    if ret_status < 0:
        logging.error("Can't find cvdump executable. please install cvdump firstly!")
        exit(-1)
    
    with open(tmp_file, 'w+') as fout:
        ret_status = subprocess.call('cvdump -fixup %s' % pdb_file, stdout=fout, stderr=sys.stderr, shell = True)
        if ret_status < 0:
            logging.error("cvdump -fixup error!")
            exit(-1)
        
        fout.close()
    
    ref_inf = refInf_pb2.RefList()

    triger = False
    with open(tmp_file, 'r+') as fout:

        for line in fout.readlines():
            
            line = line.strip()
            if not triger:
                if '-------' in line:
                    triger = True
                    continue
            
            if triger:

                ref = ref_inf.ref.add()

                cur_list = line.split()

                ref_type = int(cur_list[0], 16)
                ref_rva = int(cur_list[2], 16)
                ref_target = int(cur_list[3], 16)

                ref.ref_va = image_base + ref_rva
                ref.target_va = image_base + ref_target

                # ref: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format
                if ref_type == 1:
                    ref.ref_size = 8
                elif ref_type == 0xa:
                    ref.ref_size = 2
                elif ref_type == 0xc:
                    ref.ref_size = 1
                else:
                    ref.ref_size = 4

                logging.debug("current fixup: 0x%lx -> 0x%lx" % (ref.ref_va, ref.target_va))

                # TODO. add support of fixup type(d2d, d2c, c2c, c2d)
    fout.close()

    with open(output, 'wb') as output_file:
        output_file.write(ref_inf.SerializeToString())

    os.system('rm %s' % tmp_file)
Exemple #5
0
              (i, ref.ref_va, ref.target_va))


def dumpRef(binary, refInf):
    # logging.getLogger('angr.analyses.cfg.indirect_jump_solvers.jumptable').setLevel(logging.DEBUG)
    p = angr.Project(binary, load_options={'auto_load_libs': False})
    print("load project done!")
    extract(p, refInf)


if __name__ == "__main__":
    parser = optparse.OptionParser()
    parser.add_option("-o", "--output", dest = "output", action = "store", type = "string", \
            help = "output of the protobuf file", default = "/tmp/angr_refs.pb2")
    parser.add_option("-b", "--binary", dest = "binary", action = "store", type = "string", \
            help = "binary file", default = None)
    parser.add_option("-s", "--ss", dest = "ss", action = "store", type = "string", \
            help = "binary file", default = None)
    (options, args) = parser.parse_args()

    if options.binary == None:
        logging.error("Please input the binary file path!")
        exit(-1)

    refInf = refInf_pb2.RefList()
    dumpRef(options.binary, refInf)
    if ref_cnt == 0:
        exit(-1)
    with open(options.output, 'wb') as pbOut:
        pbOut.write(refInf.SerializeToString())
Exemple #6
0
def dumpRefs(bv, output):
    refInf = refInf_pb2.RefList()
    for (func_idx, func) in enumerate(bv.functions):
        indirect_branches = func.indirect_branches
        indirect_source = set()
        for branch in indirect_branches:
            if branch.source_addr in indirect_source:
                continue
            indirect_source.add(branch.source_addr)
        for ins in func.instructions:
            cur_addr = ins[1]
            for cref in bv.get_code_refs(cur_addr):
                ref_from = cref.address
                #il = cref.function.get_low_level_il_at(ref_from)
                #if il == None:
                #    continue
                #handleILBB(il.il_basic_block)
                #ils = getLLILFromAddr(ref_from)
                #if ils == None:
                #    logging.error("cref from address 0x%x does not have corresponding ils" % (ref_from))
                #    continue
                #candidateRefs = getCandidateRefs(ils)
                ref_from_ins = bv.get_disassembly(ref_from)
                candidateRefs = getCandidateRefsFromInsn(ref_from_ins)
                if candidateRefs == None:
                    logging.warning("cref from address 0x%x is not a fixup." % (ref_from))
                    continue
                if cur_addr not in candidateRefs:
                    continue
                ref = refInf.ref.add()
                # c2c
                init_ref(ref, ref_from, cur_addr, 0)
                logging.info("[Code Ref]: 0x%x -> 0x%x" % (ref_from , cur_addr))
            for dref in bv.get_data_refs(cur_addr):
                ref_from = dref

                ref = refInf.ref.add()
                # d2c
                init_ref(ref, ref_from, cur_addr, 2)
                logging.info("[Data Ref]: 0x%x -> 0x%x" % (ref_from , cur_addr))
            prev_addr = cur_addr

    # TODO(there is no jump table references)
    for sec in bv.sections.values():
        if sec.name == '.text':
            continue
        for cur_addr in range(sec.start, sec.end):
            for cref in bv.get_code_refs(cur_addr):
                ref_from = cref.address
                #il = cref.function.get_low_level_il_at(ref_from)
                #if il == None:
                #    continue
                #handleILBB(il.il_basic_block)
                #ils = getLLILFromAddr(ref_from)
                #if ils == None:
                #    logging.error("cref from address 0x%x does not have corresponding ils" % (ref_from))
                #    continue
                ref_from_ins = bv.get_disassembly(ref_from)
                candidateRefs = getCandidateRefsFromInsn(ref_from_ins)
                if candidateRefs == None:
                    logging.warning("cref from address 0x%x is not a fixup." % (ref_from))
                    continue
                if cur_addr not in candidateRefs:
                    continue
                ref = refInf.ref.add()
                # c2d
                init_ref(ref, ref_from, cur_addr, 1)
                logging.info("[code Ref]: 0x%x -> 0x%x" % (ref_from , cur_addr))
            for dref in bv.get_data_refs(cur_addr):
                ref_from = dref
                ref = refInf.ref.add()
                # d2d
                init_ref(ref, ref_from, cur_addr, 3)
                logging.info("[data Ref]: 0x%x -> 0x%x" % (ref_from , cur_addr))
    pbout = open(output, 'wb')
    pbout.write(refInf.SerializeToString())
    pbout.close()