Esempio n. 1
0
def _get_function_data(vw: VivWorkspace):
    all_funcs_va = vw.getFunctions()
    all_funcs_va = sorted(all_funcs_va)

    str_canvas = memcanvas.StringMemoryCanvas(vw)

    for fva in all_funcs_va:
        f_meta = vw.getFunctionMetaDict(fva)
        meta_keys = sorted(f_meta.keys())
        f_name = vw.getName(fva)

        str_canvas.addText('Function: %s\n' % f_name)
        for m_key in meta_keys:
            str_canvas.addText("\t%s: %s\n" % (str(m_key), str(f_meta[m_key])))

        for cbva, cbsize, cbfva in vw.getFunctionBlocks(fva):

            finalva = cbva + cbsize
            while cbva < finalva:
                opcode = vw.parseOpcode(cbva, const.LOC_OP)
                opcode.render(str_canvas)
                str_canvas.addText("\n")
                cbva += opcode.size

        str_canvas.addText('\n')

    return str_canvas.strval
Esempio n. 2
0
def is_indirect_call(vw: VivWorkspace,
                     va: int,
                     insn: Optional["InstructionHandle"] = None) -> bool:
    if insn is None:
        insn = vw.parseOpcode(va)

    return insn.mnem in ("call", "jmp") and isinstance(
        insn.opers[0], envi.archs.i386.disasm.i386RegOper)
Esempio n. 3
0
def get_coderef_from(vw: VivWorkspace, va: int) -> Optional[int]:
    """
    return first code `tova` whose origin is the specified va
    return None if no code reference is found
    """
    xrefs = vw.getXrefsFrom(va, REF_CODE)
    if len(xrefs) > 0:
        return xrefs[0][XR_TO]
    else:
        return None
def is_call(vw: vivisect.VivWorkspace, va: int) -> bool:
    try:
        op = vw.parseOpcode(va)
    except (envi.UnsupportedInstruction, envi.InvalidInstruction) as e:
        logger.trace("  not a call instruction: failed to decode instruction: %s", e.message)
        return False

    if op.iflags & envi.IF_CALL:
        return True

    logger.trace("  not a call instruction: %s", op)
    return False
Esempio n. 5
0
def get_previous_instructions(vw: VivWorkspace, va: int) -> List[int]:
    """
    collect the instructions that flow to the given address, local to the current function.

    args:
      vw (vivisect.Workspace)
      va (int): the virtual address to inspect

    returns:
      List[int]: the prior instructions, which may fallthrough and/or jump here
    """
    ret = []

    # find the immediate prior instruction.
    # ensure that it fallsthrough to this one.
    loc = vw.getPrevLocation(va, adjacent=True)
    if loc is not None:
        ploc = vw.getPrevLocation(va, adjacent=True)
        if ploc is not None:
            # from vivisect.const:
            # location: (L_VA, L_SIZE, L_LTYPE, L_TINFO)
            (pva, _, ptype, pinfo) = ploc

            if ptype == LOC_OP and not (pinfo & IF_NOFALL):
                ret.append(pva)

    # find any code refs, e.g. jmp, to this location.
    # ignore any calls.
    #
    # from vivisect.const:
    # xref: (XR_FROM, XR_TO, XR_RTYPE, XR_RFLAG)
    for (xfrom, _, _, xflag) in vw.getXrefsTo(va, REF_CODE):
        if (xflag & FAR_BRANCH_MASK) != 0:
            continue
        ret.append(xfrom)

    return ret
Esempio n. 6
0
def addFlirtFunctionAnalyzer(vw: vivisect.VivWorkspace,
                             analyzer: FlirtFunctionAnalyzer):
    # this is basically the logic in `vivisect.VivWorkspace.addFuncAnalysisModule`.
    # however, that routine assumes the analyzer is a Python module, which is basically a global,
    # and i am very against globals.
    # so, we manually place the analyzer into the analyzer queue.
    #
    # notably, this enables a user to register multiple FlirtAnalyzers for different signature sets.
    key = repr(analyzer)

    if key in vw.fmodlist:
        raise ValueError("analyzer already present")

    vw.fmodlist.append(key)
    vw.fmods[key] = analyzer
Esempio n. 7
0
def resolve_indirect_call(vw: VivWorkspace,
                          va: int,
                          insn: Optional["InstructionHandle"] = None
                          ) -> Tuple[int, Optional[int]]:
    """
    inspect the given indirect call instruction and attempt to resolve the target address.

    args:
      vw (vivisect.Workspace)
      va (int): the virtual address at which to start analysis

    returns:
      (va: int, value?: int|None): the address of the assignment and the value, if a constant.

    raises:
      NotFoundError: when the definition cannot be found.
    """
    if insn is None:
        insn = vw.parseOpcode(va)

    assert is_indirect_call(vw, va, insn=insn)

    return find_definition(vw, va, insn.opers[0].reg)
Esempio n. 8
0
def match_function_flirt_signatures(matcher: flirt.FlirtMatcher,
                                    vw: vivisect.VivWorkspace,
                                    va: int,
                                    cache=None):
    """
    match the given FLIRT signatures against the function at the given address.
    upon success, update the workspace with match metadata, setting the
    function as a library function and assigning its name.

    if multiple different signatures match the function, don't do anything.

    args:
      match (flirt.FlirtMatcher): the compiled FLIRT signature matcher.
      vw (vivisect.workspace): the analyzed program's workspace.
      va (int): the virtual address of a function to match.
      cache (Optional[Dict[int, Union[str, None]]]): internal cache of matches VA -> name or None on "no match".
       no need to provide as external caller.

    returns:
      Optional[str]: the recognized function name, or `None`.
    """
    if cache is None:
        # we cache both successful and failed lookups.
        #
        # (callers of this function don't need to initialize the cache.
        #  we'll provide one during recursive calls when we need it.)
        #
        # while we can use funcmeta to retrieve existing successful matches,
        # we don't persist failed matches,
        # because another FLIRT matcher might come along with better knowledge.
        #
        # however, when we match reference names, especially chained together,
        # then we need to cache the negative result, or we do a ton of extra work.
        # "accidentally quadratic" or worse.
        # see https://github.com/fireeye/capa/issues/448
        cache = {}

    function_meta = vw.funcmeta.get(va)
    if not function_meta:
        # not a function, we're not going to consider this.
        return None

    if va in cache:
        return cache[va]

    if is_library_function(vw, va):
        # already matched here.
        # this might be the case if recursive matching visited this address.
        name = viv_utils.get_function_name(vw, va)
        cache[va] = name
        return name

    # 0x200 comes from:
    #  0x20 bytes for default byte signature size in flirt
    #  0x100 bytes for max checksum data size
    #  some wiggle room for tail bytes
    size = function_meta.get("Size", 0x200)
    # viv returns truncated data at the end of sections,
    # no need for any special logic here.
    buf = vw.readMemory(va, size)

    matches = []
    for match in matcher.match(buf):
        # collect all the name tuples (name, type, offset) with type==reference.
        # ignores other name types like "public" and "local".
        references = list(filter(lambda n: n[1] == "reference", match.names))

        if not references:
            # there are no references that we need to check, so this is a complete match.
            # common case.
            matches.append(match)

        else:
            # flirt uses reference names to assert that
            # the function contains a reference to another function with a given name.
            #
            # we need to loop through these references,
            # potentially recursively FLIRT match,
            # and check the name matches (or doesn't).

            # at the end of the following loop,
            # if this flag is still true,
            # then all the references have been validated.
            does_match_references = True

            for (ref_name, _, ref_offset) in references:
                ref_va = va + ref_offset

                # the reference offset may be inside an instruction,
                # so we use getLocation to select the containing instruction address.
                loc_va = vw.getLocation(ref_va)[vivisect.const.L_VA]

                # an instruction may have multiple xrefs from
                # so we loop through all code references,
                # searching for that name.
                #
                # if the name is found, then this flag will be set.
                does_match_the_reference = False
                for xref in vw.getXrefsFrom(loc_va):
                    # FLIRT signatures only match code,
                    # so we're only going to resolve references that point to code.
                    if xref[vivisect.const.
                            XR_RTYPE] != vivisect.const.REF_CODE:
                        continue

                    target = xref[vivisect.const.XR_TO]
                    found_name = match_function_flirt_signatures(
                        matcher, vw, target, cache)

                    if found_name == ref_name:
                        does_match_the_reference = True
                        break

                if not does_match_the_reference:
                    does_match_references = False
                    break

            if does_match_references:
                # only if all references pass do we count it.
                matches.append(match)

    if matches:
        # we may have multiple signatures that match the same function, like `strcpy`.
        # these could be copies from multiple libraries.
        # so we don't mind if there are multiple matches, as long as names are the same.
        #
        # but if there are multiple candidate names, that's a problem.
        # our signatures are not precise enough.
        # we could maybe mark the function as "is a library function", but not assign name.
        # though, if we have signature FPs among library functions, it could easily FP with user code too.
        # so safest thing to do is not make any claim about the function.
        names = list(set(map(get_match_name, matches)))
        if len(names) == 1:
            name = names[0]
            add_function_flirt_match(vw, va, name)
            cache[va] = name
            logger.debug("found library function: 0x%x: %s", va, name)
            return name
        else:
            cache[va] = None
            logger.warning("conflicting names: 0x%x: %s", va, names)
            return None

    else:
        cache[va] = None
        return None
Esempio n. 9
0
def find_definition(vw: VivWorkspace, va: int,
                    reg: int) -> Tuple[int, Union[int, None]]:
    """
    scan backwards from the given address looking for assignments to the given register.
    if a constant, return that value.

    args:
      vw (vivisect.Workspace)
      va (int): the virtual address at which to start analysis
      reg (int): the vivisect register to study

    returns:
      (va: int, value?: int|None): the address of the assignment and the value, if a constant.

    raises:
      NotFoundError: when the definition cannot be found.
    """
    q = collections.deque()  # type: Deque[int]
    seen = set([])  # type: Set[int]

    q.extend(get_previous_instructions(vw, va))
    while q:
        cur = q.popleft()

        # skip if we've already processed this location
        if cur in seen:
            continue
        seen.add(cur)

        insn = vw.parseOpcode(cur)

        if len(insn.opers) == 0:
            q.extend(get_previous_instructions(vw, cur))
            continue

        opnd0 = insn.opers[0]
        if not (isinstance(opnd0, i386RegOper) and opnd0.reg == reg
                and insn.mnem in DESTRUCTIVE_MNEMONICS):
            q.extend(get_previous_instructions(vw, cur))
            continue

        # if we reach here, the instruction is destructive to our target register.

        # we currently only support extracting the constant from something like: `mov $reg, IAT`
        # so, any other pattern results in an unknown value, represented by None.
        # this is a good place to extend in the future, if we need more robust support.
        if insn.mnem != "mov":
            return (cur, None)
        else:
            opnd1 = insn.opers[1]
            if isinstance(opnd1, i386ImmOper):
                return (cur, opnd1.getOperValue(opnd1))
            elif isinstance(opnd1, i386ImmMemOper):
                return (cur, opnd1.getOperAddr(opnd1))
            elif isinstance(opnd1, Amd64RipRelOper):
                return (cur, opnd1.getOperAddr(insn))
            else:
                # might be something like: `mov $reg, dword_401000[eax]`
                return (cur, None)

    raise NotFoundError()
Esempio n. 10
0
def saveWorkspace(vw: VivWorkspace, filename: str):
    _get_function_data(vw)
    events = vw.exportWorkspace()
    vivEventsToFile(filename, events, mode='w', vw=vw)