def _get_function_data(vw: VivWorkspace): all_funcs_va = vw.getFunctions() all_funcs_va = sorted(all_funcs_va) str_canvas = memcanvas.StringMemoryCanvas(vw) for fva in all_funcs_va: f_meta = vw.getFunctionMetaDict(fva) meta_keys = sorted(f_meta.keys()) f_name = vw.getName(fva) str_canvas.addText('Function: %s\n' % f_name) for m_key in meta_keys: str_canvas.addText("\t%s: %s\n" % (str(m_key), str(f_meta[m_key]))) for cbva, cbsize, cbfva in vw.getFunctionBlocks(fva): finalva = cbva + cbsize while cbva < finalva: opcode = vw.parseOpcode(cbva, const.LOC_OP) opcode.render(str_canvas) str_canvas.addText("\n") cbva += opcode.size str_canvas.addText('\n') return str_canvas.strval
def is_indirect_call(vw: VivWorkspace, va: int, insn: Optional["InstructionHandle"] = None) -> bool: if insn is None: insn = vw.parseOpcode(va) return insn.mnem in ("call", "jmp") and isinstance( insn.opers[0], envi.archs.i386.disasm.i386RegOper)
def get_coderef_from(vw: VivWorkspace, va: int) -> Optional[int]: """ return first code `tova` whose origin is the specified va return None if no code reference is found """ xrefs = vw.getXrefsFrom(va, REF_CODE) if len(xrefs) > 0: return xrefs[0][XR_TO] else: return None
def is_call(vw: vivisect.VivWorkspace, va: int) -> bool: try: op = vw.parseOpcode(va) except (envi.UnsupportedInstruction, envi.InvalidInstruction) as e: logger.trace(" not a call instruction: failed to decode instruction: %s", e.message) return False if op.iflags & envi.IF_CALL: return True logger.trace(" not a call instruction: %s", op) return False
def get_previous_instructions(vw: VivWorkspace, va: int) -> List[int]: """ collect the instructions that flow to the given address, local to the current function. args: vw (vivisect.Workspace) va (int): the virtual address to inspect returns: List[int]: the prior instructions, which may fallthrough and/or jump here """ ret = [] # find the immediate prior instruction. # ensure that it fallsthrough to this one. loc = vw.getPrevLocation(va, adjacent=True) if loc is not None: ploc = vw.getPrevLocation(va, adjacent=True) if ploc is not None: # from vivisect.const: # location: (L_VA, L_SIZE, L_LTYPE, L_TINFO) (pva, _, ptype, pinfo) = ploc if ptype == LOC_OP and not (pinfo & IF_NOFALL): ret.append(pva) # find any code refs, e.g. jmp, to this location. # ignore any calls. # # from vivisect.const: # xref: (XR_FROM, XR_TO, XR_RTYPE, XR_RFLAG) for (xfrom, _, _, xflag) in vw.getXrefsTo(va, REF_CODE): if (xflag & FAR_BRANCH_MASK) != 0: continue ret.append(xfrom) return ret
def addFlirtFunctionAnalyzer(vw: vivisect.VivWorkspace, analyzer: FlirtFunctionAnalyzer): # this is basically the logic in `vivisect.VivWorkspace.addFuncAnalysisModule`. # however, that routine assumes the analyzer is a Python module, which is basically a global, # and i am very against globals. # so, we manually place the analyzer into the analyzer queue. # # notably, this enables a user to register multiple FlirtAnalyzers for different signature sets. key = repr(analyzer) if key in vw.fmodlist: raise ValueError("analyzer already present") vw.fmodlist.append(key) vw.fmods[key] = analyzer
def resolve_indirect_call(vw: VivWorkspace, va: int, insn: Optional["InstructionHandle"] = None ) -> Tuple[int, Optional[int]]: """ inspect the given indirect call instruction and attempt to resolve the target address. args: vw (vivisect.Workspace) va (int): the virtual address at which to start analysis returns: (va: int, value?: int|None): the address of the assignment and the value, if a constant. raises: NotFoundError: when the definition cannot be found. """ if insn is None: insn = vw.parseOpcode(va) assert is_indirect_call(vw, va, insn=insn) return find_definition(vw, va, insn.opers[0].reg)
def match_function_flirt_signatures(matcher: flirt.FlirtMatcher, vw: vivisect.VivWorkspace, va: int, cache=None): """ match the given FLIRT signatures against the function at the given address. upon success, update the workspace with match metadata, setting the function as a library function and assigning its name. if multiple different signatures match the function, don't do anything. args: match (flirt.FlirtMatcher): the compiled FLIRT signature matcher. vw (vivisect.workspace): the analyzed program's workspace. va (int): the virtual address of a function to match. cache (Optional[Dict[int, Union[str, None]]]): internal cache of matches VA -> name or None on "no match". no need to provide as external caller. returns: Optional[str]: the recognized function name, or `None`. """ if cache is None: # we cache both successful and failed lookups. # # (callers of this function don't need to initialize the cache. # we'll provide one during recursive calls when we need it.) # # while we can use funcmeta to retrieve existing successful matches, # we don't persist failed matches, # because another FLIRT matcher might come along with better knowledge. # # however, when we match reference names, especially chained together, # then we need to cache the negative result, or we do a ton of extra work. # "accidentally quadratic" or worse. # see https://github.com/fireeye/capa/issues/448 cache = {} function_meta = vw.funcmeta.get(va) if not function_meta: # not a function, we're not going to consider this. return None if va in cache: return cache[va] if is_library_function(vw, va): # already matched here. # this might be the case if recursive matching visited this address. name = viv_utils.get_function_name(vw, va) cache[va] = name return name # 0x200 comes from: # 0x20 bytes for default byte signature size in flirt # 0x100 bytes for max checksum data size # some wiggle room for tail bytes size = function_meta.get("Size", 0x200) # viv returns truncated data at the end of sections, # no need for any special logic here. buf = vw.readMemory(va, size) matches = [] for match in matcher.match(buf): # collect all the name tuples (name, type, offset) with type==reference. # ignores other name types like "public" and "local". references = list(filter(lambda n: n[1] == "reference", match.names)) if not references: # there are no references that we need to check, so this is a complete match. # common case. matches.append(match) else: # flirt uses reference names to assert that # the function contains a reference to another function with a given name. # # we need to loop through these references, # potentially recursively FLIRT match, # and check the name matches (or doesn't). # at the end of the following loop, # if this flag is still true, # then all the references have been validated. does_match_references = True for (ref_name, _, ref_offset) in references: ref_va = va + ref_offset # the reference offset may be inside an instruction, # so we use getLocation to select the containing instruction address. loc_va = vw.getLocation(ref_va)[vivisect.const.L_VA] # an instruction may have multiple xrefs from # so we loop through all code references, # searching for that name. # # if the name is found, then this flag will be set. does_match_the_reference = False for xref in vw.getXrefsFrom(loc_va): # FLIRT signatures only match code, # so we're only going to resolve references that point to code. if xref[vivisect.const. XR_RTYPE] != vivisect.const.REF_CODE: continue target = xref[vivisect.const.XR_TO] found_name = match_function_flirt_signatures( matcher, vw, target, cache) if found_name == ref_name: does_match_the_reference = True break if not does_match_the_reference: does_match_references = False break if does_match_references: # only if all references pass do we count it. matches.append(match) if matches: # we may have multiple signatures that match the same function, like `strcpy`. # these could be copies from multiple libraries. # so we don't mind if there are multiple matches, as long as names are the same. # # but if there are multiple candidate names, that's a problem. # our signatures are not precise enough. # we could maybe mark the function as "is a library function", but not assign name. # though, if we have signature FPs among library functions, it could easily FP with user code too. # so safest thing to do is not make any claim about the function. names = list(set(map(get_match_name, matches))) if len(names) == 1: name = names[0] add_function_flirt_match(vw, va, name) cache[va] = name logger.debug("found library function: 0x%x: %s", va, name) return name else: cache[va] = None logger.warning("conflicting names: 0x%x: %s", va, names) return None else: cache[va] = None return None
def find_definition(vw: VivWorkspace, va: int, reg: int) -> Tuple[int, Union[int, None]]: """ scan backwards from the given address looking for assignments to the given register. if a constant, return that value. args: vw (vivisect.Workspace) va (int): the virtual address at which to start analysis reg (int): the vivisect register to study returns: (va: int, value?: int|None): the address of the assignment and the value, if a constant. raises: NotFoundError: when the definition cannot be found. """ q = collections.deque() # type: Deque[int] seen = set([]) # type: Set[int] q.extend(get_previous_instructions(vw, va)) while q: cur = q.popleft() # skip if we've already processed this location if cur in seen: continue seen.add(cur) insn = vw.parseOpcode(cur) if len(insn.opers) == 0: q.extend(get_previous_instructions(vw, cur)) continue opnd0 = insn.opers[0] if not (isinstance(opnd0, i386RegOper) and opnd0.reg == reg and insn.mnem in DESTRUCTIVE_MNEMONICS): q.extend(get_previous_instructions(vw, cur)) continue # if we reach here, the instruction is destructive to our target register. # we currently only support extracting the constant from something like: `mov $reg, IAT` # so, any other pattern results in an unknown value, represented by None. # this is a good place to extend in the future, if we need more robust support. if insn.mnem != "mov": return (cur, None) else: opnd1 = insn.opers[1] if isinstance(opnd1, i386ImmOper): return (cur, opnd1.getOperValue(opnd1)) elif isinstance(opnd1, i386ImmMemOper): return (cur, opnd1.getOperAddr(opnd1)) elif isinstance(opnd1, Amd64RipRelOper): return (cur, opnd1.getOperAddr(insn)) else: # might be something like: `mov $reg, dword_401000[eax]` return (cur, None) raise NotFoundError()
def saveWorkspace(vw: VivWorkspace, filename: str): _get_function_data(vw) events = vw.exportWorkspace() vivEventsToFile(filename, events, mode='w', vw=vw)