def processRefs(output): """ process all the xrefs that ida recognizes params: output: protobuf file path returns: """ refInf = refInf_pb2.RefList() # iterate over all valid terms for head in idautils.Heads(): is_code = False candidateRefs = None if idc.is_code(idc.get_full_flags(head)): is_code = True decoded_inst = ida_ua.insn_t() insn_len = ida_ua.decode_insn(decoded_inst, head) if insn_len > 0: candidateRefs = getCandidateRefsFromInsn(decoded_inst) if is_code and candidateRefs == None: continue for xref in idautils.XrefsFrom(head, 0): ref_from = head target_addr = xref.to # check if target_addr is in current instruction internal representation if is_code: if target_addr not in candidateRefs: continue else: ref_from = candidateRefs[target_addr] if is_invalid_ea(target_addr): continue logging.debug( "Ref: 0x%x -> 0x%x, type is %s" % (ref_from, target_addr, idautils.XrefTypeName(xref.type))) ref = refInf.ref.add() ref.ref_va = ref_from ref.target_va = target_addr # default value ref.ref_size = 8 target_is_code = idc.is_code(idc.get_full_flags(target_addr)) if is_code and target_is_code: ref.kind = 0 # c2c elif is_code and not target_is_code: ref.kind = 1 # c2d elif not is_code and target_is_code: ref.kind = 2 # d2c else: ref.kind = 3 # d2d ## save the protobuf result with open(output, 'wb') as pbOut: pbOut.write(refInf.SerializeToString())
def dumpRefs(output): refInf = refInf_pb2.RefList() listing = currentProgram.getListing() refSet = set() # record the basic block that has been added by functions for codeunit in listing.getCodeUnits(True): if time.time() > timeout: logging.error("Oh, sorry! time out!") exit(-1) if isinstance(codeunit, Instruction): target_vas = set() for pcode in codeunit.getPcode(): for varnode in pcode.getInputs(): if varnode.isAddress() or varnode.isConstant(): target_vas.add(varnode.getOffset()) output_varnode = pcode.getOutput() if output_varnode != None and (output_varnode.isAddress() or output_varnode.isConstant()): target_vas.add(output_varnode.getOffset()) collected_refs = set() for xref in codeunit.getReferencesFrom(): if xref.isStackReference(): continue target_va = xref.getToAddress().getOffset() # make sure the target_va is in current instruction's internal represent if target_va not in target_vas or \ target_va in collected_refs: continue collected_refs.add(target_va) ref = refInf.ref.add() ref.ref_va = xref.getFromAddress().getOffset() ref.target_va = target_va # TODO: Get the correct size and kind ref.ref_size = 8 ref.kind = 0 logging.info("[code]: From 0x%x -> 0x%x" % (ref.ref_va, ref.target_va)) else: for xref in codeunit.getReferencesFrom(): ref_addr = xref.getFromAddress().getOffset() if ref_addr in refSet: continue logging.info("[data]: From 0x%x -> 0x%x" % (xref.getFromAddress().getOffset(), xref.getToAddress().getOffset())) refSet.add(ref_addr) ref = refInf.ref.add() ref.ref_va = xref.getFromAddress().getOffset() ref.target_va = xref.getToAddress().getOffset() & 0xffffffffffffffff #ref.target_va = 0 ref.ref_size = 0 ref.kind = 0 logging.debug("Collect Refs done! ready to write output...") f = open(output, "wb") f.write(refInf.SerializeToString()) f.close()
if not_included != None: logging.debug( "Append the not included functions! {0}".format(not_included)) notIncludedLinkerFunc != not_included load_range = getLoadAddressRange(options.binary) load_range = enlargeRange(load_range) logging.debug("load range is {}".format(load_range)) getLinkerFunctionRange(options.binary) elfclass = readElfClass(options.binary) MD = init_capstone(elfclass) readSecRange(options.binary) loadedSegs = get_loaded_info(options.binary) PIE = isPIE(options.binary) if PIE: disassembler_base_addr = getPIEBaseOffset(options.comparedfile) refInf1 = refInf_pb2.RefList() refInf2 = refInf_pb2.RefList() try: f1 = open(options.groundtruth, 'rb') refInf1.ParseFromString(f1.read()) f1.close() except: print("Could not open the file: %s\n" % options.groundtruth) exit(-1) try: f2 = open(options.comparedfile, 'rb') refInf2.ParseFromString(f2.read()) f2.close() except: print("Could not open the file: %s\n" % options.comparedfile)
def parse_fixup(pdb_file, bin_path, output): tmp_file = './tmp_cvdump.fixup' with open(tmp_file, 'w+') as fout: ret_status = subprocess.call("dumpbin -HEADERS %s" % bin_path, stdout=fout, shell = True) if ret_status: logging.error("dump symbols of binary") exit(-1) fout.close() image_base = 0x0 find_base = False with open(tmp_file, 'r+') as fout: for line in fout.readlines(): if 'image base' in line: split_line = line.strip().split() image_base = int(split_line[0], 16) logging.debug("Image base is 0x%lx", image_base) find_base = True break fout.close() if not find_base: logging.error("Can't parse the image base correctly!") exit(-1) ret_status = subprocess.call('cvdump -?', stdout = subprocess.PIPE, stderr = subprocess.PIPE, shell=True) if ret_status < 0: logging.error("Can't find cvdump executable. please install cvdump firstly!") exit(-1) with open(tmp_file, 'w+') as fout: ret_status = subprocess.call('cvdump -fixup %s' % pdb_file, stdout=fout, stderr=sys.stderr, shell = True) if ret_status < 0: logging.error("cvdump -fixup error!") exit(-1) fout.close() ref_inf = refInf_pb2.RefList() triger = False with open(tmp_file, 'r+') as fout: for line in fout.readlines(): line = line.strip() if not triger: if '-------' in line: triger = True continue if triger: ref = ref_inf.ref.add() cur_list = line.split() ref_type = int(cur_list[0], 16) ref_rva = int(cur_list[2], 16) ref_target = int(cur_list[3], 16) ref.ref_va = image_base + ref_rva ref.target_va = image_base + ref_target # ref: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format if ref_type == 1: ref.ref_size = 8 elif ref_type == 0xa: ref.ref_size = 2 elif ref_type == 0xc: ref.ref_size = 1 else: ref.ref_size = 4 logging.debug("current fixup: 0x%lx -> 0x%lx" % (ref.ref_va, ref.target_va)) # TODO. add support of fixup type(d2d, d2c, c2c, c2d) fout.close() with open(output, 'wb') as output_file: output_file.write(ref_inf.SerializeToString()) os.system('rm %s' % tmp_file)
(i, ref.ref_va, ref.target_va)) def dumpRef(binary, refInf): # logging.getLogger('angr.analyses.cfg.indirect_jump_solvers.jumptable').setLevel(logging.DEBUG) p = angr.Project(binary, load_options={'auto_load_libs': False}) print("load project done!") extract(p, refInf) if __name__ == "__main__": parser = optparse.OptionParser() parser.add_option("-o", "--output", dest = "output", action = "store", type = "string", \ help = "output of the protobuf file", default = "/tmp/angr_refs.pb2") parser.add_option("-b", "--binary", dest = "binary", action = "store", type = "string", \ help = "binary file", default = None) parser.add_option("-s", "--ss", dest = "ss", action = "store", type = "string", \ help = "binary file", default = None) (options, args) = parser.parse_args() if options.binary == None: logging.error("Please input the binary file path!") exit(-1) refInf = refInf_pb2.RefList() dumpRef(options.binary, refInf) if ref_cnt == 0: exit(-1) with open(options.output, 'wb') as pbOut: pbOut.write(refInf.SerializeToString())
def dumpRefs(bv, output): refInf = refInf_pb2.RefList() for (func_idx, func) in enumerate(bv.functions): indirect_branches = func.indirect_branches indirect_source = set() for branch in indirect_branches: if branch.source_addr in indirect_source: continue indirect_source.add(branch.source_addr) for ins in func.instructions: cur_addr = ins[1] for cref in bv.get_code_refs(cur_addr): ref_from = cref.address #il = cref.function.get_low_level_il_at(ref_from) #if il == None: # continue #handleILBB(il.il_basic_block) #ils = getLLILFromAddr(ref_from) #if ils == None: # logging.error("cref from address 0x%x does not have corresponding ils" % (ref_from)) # continue #candidateRefs = getCandidateRefs(ils) ref_from_ins = bv.get_disassembly(ref_from) candidateRefs = getCandidateRefsFromInsn(ref_from_ins) if candidateRefs == None: logging.warning("cref from address 0x%x is not a fixup." % (ref_from)) continue if cur_addr not in candidateRefs: continue ref = refInf.ref.add() # c2c init_ref(ref, ref_from, cur_addr, 0) logging.info("[Code Ref]: 0x%x -> 0x%x" % (ref_from , cur_addr)) for dref in bv.get_data_refs(cur_addr): ref_from = dref ref = refInf.ref.add() # d2c init_ref(ref, ref_from, cur_addr, 2) logging.info("[Data Ref]: 0x%x -> 0x%x" % (ref_from , cur_addr)) prev_addr = cur_addr # TODO(there is no jump table references) for sec in bv.sections.values(): if sec.name == '.text': continue for cur_addr in range(sec.start, sec.end): for cref in bv.get_code_refs(cur_addr): ref_from = cref.address #il = cref.function.get_low_level_il_at(ref_from) #if il == None: # continue #handleILBB(il.il_basic_block) #ils = getLLILFromAddr(ref_from) #if ils == None: # logging.error("cref from address 0x%x does not have corresponding ils" % (ref_from)) # continue ref_from_ins = bv.get_disassembly(ref_from) candidateRefs = getCandidateRefsFromInsn(ref_from_ins) if candidateRefs == None: logging.warning("cref from address 0x%x is not a fixup." % (ref_from)) continue if cur_addr not in candidateRefs: continue ref = refInf.ref.add() # c2d init_ref(ref, ref_from, cur_addr, 1) logging.info("[code Ref]: 0x%x -> 0x%x" % (ref_from , cur_addr)) for dref in bv.get_data_refs(cur_addr): ref_from = dref ref = refInf.ref.add() # d2d init_ref(ref, ref_from, cur_addr, 3) logging.info("[data Ref]: 0x%x -> 0x%x" % (ref_from , cur_addr)) pbout = open(output, 'wb') pbout.write(refInf.SerializeToString()) pbout.close()