Esempio n. 1
0
    def decode(self, data, addr):
        op_str = ""
        imm = 0
        operands = []

        for insn in self._md.disasm(data, addr):
            size = insn.size
            name = insn.mnemonic
            imm_val = False

            if len(insn.operands) > 0:
                for i in insn.operands:
                    if i.type == RISCV_OP_REG:
                        op_str += " " + (insn.reg_name(i.value.reg))
                        operands.append(insn.reg_name(i.value.reg))
                    elif i.type == RISCV_OP_IMM:
                        imm = i.value.imm
                        imm_val = True
                    elif i.type == RISCV_OP_MEM:
                        if i.mem.base != 0:
                            op_str += " " + insn.reg_name(i.mem.base)
                            operands.append(insn.reg_name(i.mem.base))

                        if i.mem.disp != 0:
                            imm = i.mem.disp
                            imm_val = True
                    else:
                        log_warn(
                            f"[RISC-V] unhandled capstone instruction type {i.type!r}"
                        )

            return RVInstruction(insn.address, size, name, op_str, operands, imm, imm_val)
Esempio n. 2
0
 def check_path_substitution(self, path):
     """Checks for files using path substitutions, going from longest to shortest original path"""
     sorted_original_paths = sorted(self.path_substitutions.keys(),
                                    key=lambda k: len(k),
                                    reverse=True)
     candidate_matches = []
     for candidate_path in sorted_original_paths:
         if candidate_path in path:
             substitute_pattern = self.path_substitutions[candidate_path]
             substitute_path = path.replace(candidate_path,
                                            substitute_pattern)
             substitute_path = os.path.expanduser(substitute_path)
             candidate_matches.append(substitute_path)
             if os.path.exists(substitute_path):
                 return substitute_path
     # Only log_warn once per file, and only if the user has tried to add translations
     if path not in self.failed_substitutions:
         if len(self.path_substitutions) > 0:
             log_warn("Failed to find substitution for %s" % path)
             log_info("Current substitution paths:")
             for orig_path, sub_path in self.path_substitutions.items():
                 log_info("  %s => %s" % (orig_path, sub_path))
             log_info("Matching patterns' failed substitute paths:")
             for candidate in candidate_matches:
                 log_info("  %s" % candidate)
         self.failed_substitutions.append(path)
     return ""
    def check_relocations_with_readelf(self, syms, sym, refs):
        cmd = 'readelf -r "%s"  | grep -e " %s"' % (self.bv.file.filename, sym.name[:22])
        output = subprocess.check_output(cmd, shell=True).strip()
        readelf_reloc_addrs = {x.address for x in refs}
        for i in output.split("\n"):
            readelf_reloc_addrs.add(int(i.split(" ", 1)[0], 16) - 1)
        readelf_nr_relocs = len(readelf_reloc_addrs)

        # Ensure we only account once for each xref address (because of overlaping funcs)
        binja_reloc_addrs = {x.address for x in refs}
        binja_nr_relocs = len(binja_reloc_addrs)

        # Because there is a xref to the GOT which binja removes (I think)
        if len(syms) > 1:
            binja_nr_relocs += 1

        msg = "External symbol: relocs amount for '%s': Readelf=%-2d; Binja=%-2d" % (
            sym.name, readelf_nr_relocs, binja_nr_relocs
        )
        if readelf_nr_relocs != binja_nr_relocs:
            log_error(msg)
            log_error(
                "Different is: %s" %
                " ".join([hex(x) for x in readelf_reloc_addrs.difference(binja_reloc_addrs)])
            )

        if not (binja_nr_relocs <= readelf_nr_relocs):
            log_warn("Such ninja")
Esempio n. 4
0
def addr2line(executable, offset):
    """Returns the line of source like "<file>:<line #>:<function_name>"

    Returns "ERROR: str(exception)" or "?" on failure."""
    addr2line_invocation = "addr2line -e %s -a 0x%x -f" % (executable, offset)
    child = subprocess.Popen(addr2line_invocation.split(),
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
    out, err = child.communicate()
    try:
        if not isinstance(out, str):
            out = out.decode()
        output_lines = out.split("\n")
        #output_address = output_lines[0]  # "0x00025ff4"
        function_name = output_lines[1].strip(
        )  # e.g. "png_get_current_pass_number"
        source_line = output_lines[2].strip(
        )  # e.g. "/home/wintermute/targets/libpng-1.6.36/pngtrans.c:861"
    except Exception as e:
        log_warn("[!] Exception encountered in addr2line: %s" % str(e))
        log_info("    stdout: %s" % out)
        log_info("    stderr: %s" % err)
        return "ERROR: %s" % str(e)
    if source_line.startswith("??") or source_line.endswith("?"):
        return "?"
    return ":".join((source_line, function_name))
Esempio n. 5
0
	def port_symbols(self, i):
		if self.role == None:
			return

		entry = self.match_model.entries[i]
		target_index = self.role
		source_index = 1 if target_index == 0 else 0

		source_name = entry["name{}".format(source_index + 1)]
		target_address = entry["address{}".format(target_index + 1)]

		old_sym = self.bv.get_symbol_at(target_address)

		target_name = None
		if old_sym:
			target_name = old_sym.name
		target_text = target_name if target_name else "<unnamed>"

		if not source_name:
			bn.log_warn("Port symbols: {} @ {:x} has no source name, skipping".format(target_text, target_address))
			return

		if old_sym and not old_sym.auto:
			bn.log_warn("Port symbols: {} @ {:x} is already named, skipping".format(target_text, target_address))
			return

		bn.log_info("Port symbols: {} @ {:x} -> {}".format(target_text, target_address, source_name))
		new_sym = bn.Symbol(bn.SymbolType.FunctionSymbol, target_address, source_name)
		self.bv.define_user_symbol(new_sym)
Esempio n. 6
0
    def lift(self, il, instr, mnemonic):
        """
        main entry point for lifting instruction to LLIL
        """

        if mnemonic == 'or':
            mnemonic = 'or_expr'
        elif mnemonic == 'c.or':
            mnemonic = 'c.or_expr'
        elif mnemonic == 'and':
            mnemonic = 'and_expr'
        elif mnemonic == 'c.and':
            mnemonic = 'c.and_expr'
        elif mnemonic == 'not':
            mnemonic = 'not_expr'
        elif mnemonic == 'c.not':
            mnemonic = 'c.not_expr'

        # we need this to handle instructions with a '.' in the middle (e.g., "sext.w")
        mnemonic = mnemonic.replace(".", "_")

        handler = None
        ops = instr.operands

        if hasattr(self, mnemonic):
            # regular instruction -> lookup the function in the lifter
            handler = getattr(self, mnemonic)
        elif mnemonic.startswith("c_"):
            # compressed instruction prefix

            if hasattr(self, "c_" + mnemonic[2:]):
                # check if we have a lifter function for the compressed instruction
                handler = getattr(self, "c_" + mnemonic[2:])
            elif hasattr(self, mnemonic[2:]):
                # else fall back to the uncompressed handler
                handler = getattr(self, mnemonic[2:])
                # if we have operands, compressed instructions typically follow
                # the same rule of thumb:
                # inst rX, rX, P <=> c.inst rX, P

                if ops:
                    ops = [ops[0]] + ops

        if handler is not None:
            try:
                handler(il, ops, instr.imm)
            except Exception:
                log_error(
                    f"failed to lift instruction {mnemonic}@{il.current_address:#x} with handler {handler!r}"
                )
                raise
        else:
            # print unimplemented mnemonics as warning, but just once

            if mnemonic not in _unliftable:
                log_warn(f"[RISCV] cannot lift instruction: {mnemonic}")
                _unliftable.add(mnemonic)

            il.append(il.unimplemented())
Esempio n. 7
0
def hook_arch(cls):
    cls.get_instruction_info = decorate(profile, cls.get_instruction_info)
    cls.get_instruction_text = decorate(profile, cls.get_instruction_text)
    cls.get_instruction_low_level_il = decorate(
        profile, cls.get_instruction_low_level_il)
    binaryninja.log_warn(
        "Performance impact from bench_hook.py profiling, check if it's still needed."
    )
Esempio n. 8
0
def discover_names(func, func_params):
    param = func_params[0]
    paramIndex = func.parameter_vars.vars.index(param)
    identified_functions = {}

    for caller in set(func.callers):
        logged_names = set()

        # Ensure that we only see one method used in this function
        for mlil_inst in caller.mlil.instructions:

            # Calls only
            if mlil_inst.operation != MediumLevelILOperation.MLIL_CALL:
                continue

            # Ensure that we're only acting on our calls
            # FIXME: There must be a better way to find the callee
            if not hasattr(mlil_inst.operands[1], 'constant'):
                continue

            called_func = func.view.get_function_at(
                mlil_inst.operands[1].constant)
            if called_func != func:
                continue

            call_site_param = caller.get_parameter_at(mlil_inst.address,
                                                      func.function_type,
                                                      paramIndex)
            # FIXME: There must be a better way again
            if str(call_site_param) == "<undetermined>":
                call_site_param = None
            logged_names.add(call_site_param)

        if len(logged_names) != 1 or None in logged_names:
            log_warn(
                "Unable to determine method name for function %r: Identified method names: %r"
                % (caller, logged_names))
            continue

        logged_name_addr = list(logged_names)[0].value
        method_name = func.view.get_string_at(logged_name_addr).value

        if method_name not in identified_functions:
            identified_functions[method_name] = set()

        identified_functions[method_name].add(caller)

    # Eliminate names with multiple callers
    for name, callers in dict(identified_functions).items():
        if len(callers) != 1:
            log_debug("Eliminating name %r with callers %r" % (name, callers))
            del identified_functions[name]

        else:
            identified_functions[name] = list(callers)[0]

    return identified_functions
Esempio n. 9
0
 def get_instruction_low_level_il(self, data, addr, il):
     ins = self._get_instruction(data, addr)
     if ins:
         ins.get_llil(il)
         return ins.length()
     else:
         binaryninja.log_warn(
             "Could not parse instruction @ 0x{:08X}".format(addr))
         il.append(il.no_ret())
         return 0
Esempio n. 10
0
def highlight_insts(color_name, *args):
    color = HighlightStandardColor.BlackHighlightColor
    try:
        color_name = color_name[0].upper() + color_name[1:]
        color = getattr(HighlightStandardColor, f"{color_name}HighlightColor")
    except AttributeError:
        log_warn(f"failed to find standard highlight color {color_name}")
        color = HighlightStandardColor.BlackHighlightColor

    def func(_, func):
        for inst, addr in func.instructions:
            if str(inst[0]).strip() in args:
                func.set_user_instr_highlight(addr, color)

    return func
    def get_mlil_instr(self, func, addr):
        llil_instr = func.get_low_level_il_at(addr)
        if not llil_instr:
            log_error(
                f"Couldn't get llil_instr at {hex(addr)}"
                f" (Could be because there are WRONG repeated xrefs for overlaping funcs)"
                f" (last time this happened it was (issue #1196) (should be fixed now)"
            )
            return None

        mlil_instr = llil_instr.medium_level_il
        if not mlil_instr:
            log_warn(f"Couldn't get mlil_instr from llil_instr at {hex(addr)} (probably was not a call)")
            return None

        return mlil_instr
Esempio n. 12
0
    def run(self):
        log_info("Scanning binary view for matching YARA signatures")

        # TODO: Scan the raw binary data from the Raw view instead of by segment.
        # This would require mapping the addresses from the Raw view to the PE/ELF views.
        # raw = self.bv.get_view_of_type("Raw")
        # reader = BinaryReader(raw)
        # data = reader.read(raw.end)

        try:
            for idx, rule in enumerate(self.rules):
                if len(self.bv.segments) == 0:
                    # Scan binary without segments
                    self.scan(self.bv.start, self.bv.end, rule)
                else:
                    # Scan by segment
                    for segment in self.bv.segments:
                        if self.cancelled:
                            return

                        self.scan(segment.start, segment.data_length, rule)

                self.progress = f"{self.progress_banner} matching on rules ({round((idx / len(self.rules)) * 100)}%)"

        except yara.TimeoutError:
            log_warn(
                "YARA scan exceeded timeout limit. Consider changing the timeout in settings."
            )
        except yara.Error as err:
            log_error("Error matching on YARA rules: {}".format(str(err)))
            show_message_box("Error",
                             "Check logs for details",
                             icon=MessageBoxIcon.ErrorIcon)

        if 0 < len(self.results):
            if Settings().get_bool("yara.displayReport"):
                self.display_report()
        else:
            log_info("YARA scan finished with no matches.")
Esempio n. 13
0
 def do_path_substitution(self):
     original_path = self.original_path.text()
     new_path = self.substitute_path.text()
     if isinstance(original_path, bytes):
         original_path = original_path.decode()
         new_path = new_path()
     if original_path == "":
         log_warn("Path substitution error: Original path can't be blank")
     elif new_path == "":
         if original_path in self.path_substitutions:
             old_sub = self.path_substitutions.pop(original_path)
             log_info("Removed path substitution: %s -> %s" %
                      (original_path, old_sub))
         else:
             log_warn(
                 "Path substitution error: New substitute path can't be blank"
             )
     else:
         self.path_substitutions[original_path] = new_path
         log_info("Added path substitution: %s -> %s" %
                  (original_path, new_path))
         self.failed_substitutions = [
         ]  # clear failures when new path added
Esempio n. 14
0
 def get_relocation_info(self, view, arch, results):
     for reloc in results:
         reloc.type = bn.RelocationType.StandardRelocationType
         if reloc.nativeType == R_MICROBLAZE._REL:
             reloc.pcRelative = False;
             reloc.baseRelative = True;
             reloc.hasSign = False;
             reloc.size = 4;
             reloc.truncateSize = 4;
             reloc.implicitAddend = False;
         elif reloc.nativeType in [R_MICROBLAZE._GLOB_DAT, 
                                   R_MICROBLAZE._JUMP_SLOT, 
                                   R_MICROBLAZE._32]:
             if reloc.nativeType == R_MICROBLAZE._GLOB_DAT:
                 reloc.type = bn.RelocationType.ELFGlobalRelocationType
             elif reloc.nativeType == R_MICROBLAZE._JUMP_SLOT:
                 reloc.type = bn.RelocationType.ELFJumpSlotRelocationType
             reloc.pcRelative = False
             reloc.baseRelative = False
             if reloc.nativeType == R_MICROBLAZE._32:
                 reloc.hasSign = False
             reloc.size = 4
             reloc.truncateSize = 4
         elif reloc.nativeType == R_MICROBLAZE._COPY:
             reloc.type = bn.RelocationType.ELFCopyRelocationType
             reloc.pcRelative = False
             reloc.baseRelative = False
             reloc.size = 4
             reloc.truncateSize = 4
         elif reloc.nativeType == R_MICROBLAZE._NONE:
             reloc.type = bn.RelocationType.IgnoredRelocation
         else:
             reloc.type = bn.RelocationType.UnhandledRelocation
             bn.log_warn('Unimplemented ELF relocation type: %d' % (R_MICROBLAZE(reloc.nativeType),))
     #if len(results):
     #    bn.log_info(f'{self}.get_relocation_info(bv, {arch}, |{len(results)}|) / {reloc.nativeType}')
     return True
Esempio n. 15
0
def lookup_hash(sig, use_cache=True):

    if use_cache:
        init_cache()
        global _4byte_cache

        # if sig in _4byte_cache and _4byte_cache[sig]:
        #     return _4byte_cache[sig]
        tsig = _4byte_cache.get(sig, [])
        if tsig:
            return tsig

    if not _requests_available:
        log_error("couldn't import requests for fetching from 4byte.directory")
        return []
    try:
        res = requests.get(LOOKUP_4BYTE_URL, params={"hex_signature": sig})
        rj = res.json()
        results = rj['results']

        if len(results) >= 1:
            sig_collisions = [r['text_signature'] for r in results]
            _4byte_cache[sig] = sig_collisions
            return sig_collisions
        else:
            log_warn(
                "4byte.directory didn't yield any results for '{}'".format(
                    sig))
            return []
    except AssertionError:
        raise
    except Exception as e:
        log_error("4byte lookup failed, reason ({}): {}".format(type(e), e))
        return []

    return []
    def find_format_strings(self, CHECK_WITH_READELF=False):
        visited = set()
        to_visit = []

        # ====================
        # Step 0: Get all hardcoded known printf_like functions
        to_visit = PrintfLikeFunction.load_all()
        # @@TODO: We could look for refs of strings with '%s', '%d'... and if they are the parameter of an external function, add those as 'printf like'

        while to_visit:
            printf_like_func = to_visit.pop(0)

            # Sometimes, due to saving printf_like_funcs in a file to later reload we get repeated entries
            if printf_like_func in visited:
                log_debug("Skipping analysis of duplicate printf_like_func ' %s '" % printf_like_func.name)
                continue
            visited.add(printf_like_func)

            syms = self.get_symbols_by_raw_name(printf_like_func.name)
            if not syms:
                if printf_like_func.name.startswith("sub_"):
                    log_error(f"No symbol found for function '{printf_like_func.name}'")
                continue

            log_debug(f"\n===== {printf_like_func} =====")
            log_debug(f" syms: {syms} =====")

            # @@TODO: Add arg name 'format' and type 'char*' to the format var (Tried before but arg and var get disconnected sometimes. Likely a bug.)

            # Get every ref for this symbol(s)
            refs = []
            for sym in syms:
                it_refs = self.bv.get_code_refs(sym.address)

                # readelf check to get a second opinion
                if CHECK_WITH_READELF and sym.type == SymbolType.ExternalSymbol:
                    self.check_relocations_with_readelf(sym, syms, it_refs)

                refs += it_refs

            # ====================
            # Step 1: Check each xref for vulns
            for ref in refs:
                log_debug(f"Analyzing xref {hex(ref.address)}")
                ref_result = FormatStringFinderResult(self.bv, ref)
                self.results.append(ref_result)

                # ====================
                # Step 1.0: Sanity checks
                mlil_instr = self.get_mlil_instr(ref.function, ref.address)
                if not mlil_instr:
                    continue

                # Check for known unhandled operations
                if mlil_instr.operation in (
                    MLILOperation.MLIL_CALL_UNTYPED, MLILOperation.MLIL_TAILCALL_UNTYPED
                ):
                    log_debug("@@TODO: How to handle MLIL_CALL_UNTYPED and MLIL_TAILCALL_UNTYPED?")
                    continue
                elif mlil_instr.operation in (MLILOperation.MLIL_SET_VAR, MLILOperation.MLIL_STORE):
                    # Our xref is being used to set a var and not in a call.
                    # @@TODO: Maybe we could try to find if it is called close by and use that as an xref
                    continue

                # If it wasn't one of the above, it must be one of these
                if mlil_instr.operation not in (MLILOperation.MLIL_CALL, MLILOperation.MLIL_TAILCALL):
                    assert False, f"mlil operation '{mlil_instr.operation.name}' is unsupported @ {hex(ref.address)}"

                # @@TODO: Can we force it to have the necessary arguments? Looking at the calling convention?
                if printf_like_func.parameter_index >= len(mlil_instr.params):
                    log_error(
                        f"{hex(ref.address)} : parameter nr {printf_like_func.parameter_index} for "
                        f"function call of '{printf_like_func.name}' is not available"
                    )
                    continue

                if self.should_highlight:
                    ref.function.set_user_instr_highlight(
                        ref.address, HighlightStandardColor.RedHighlightColor
                    )
                # ====================
                # Step 1.1: Find the origins of the format parameter for this xref
                fmt_param = mlil_instr.ssa_form.params[printf_like_func.parameter_index]

                if fmt_param.operation in (MLILOperation.MLIL_CONST, MLILOperation.MLIL_CONST_PTR):
                    # Handle immediate constants
                    var_origins = [VarOriginConst(fmt_param.constant)]
                elif fmt_param.operation in (MLILOperation.MLIL_VAR_SSA, MLILOperation.MLIL_VAR_ALIASED):
                    # @@TODO: What is the meaning of 'MLILOperation.MLIL_VAR_ALIASED' ?
                    # Find the origins of the variable
                    fmt_ssa = fmt_param.src
                    mlil_ssa = ref.function.medium_level_il.ssa_form

                    # Get the var origins. Can be a parameter, a const, an address of another var...
                    var_origins = MLILSSAVarAnalysisOrigins(self.bv,
                                                            mlil_ssa).run(fmt_ssa, self.should_highlight)
                else:
                    assert False, f"ERROR: fmt_param.operation is {fmt_param.operation.name} @ {hex(ref.address)}"

                if var_origins is None:
                    log_warn(f"{hex(ref.address)} : Failed to get origins of the format parameter")
                    continue

                # ====================
                # Step 1.2: Determine if the origins are safe or vulnerable
                # Case 1: If any origin is an argument -> PRINTF_LIKE
                # Case 2: If any is NOT a read-only constant or a parameter -> VULN
                # Case 3: If all are an arg or a const -> SAFE
                vuln_origins = []
                safe_origins = []

                for orig in var_origins:
                    if isinstance(orig, VarOriginParameter):
                        safe_origins.append(orig)

                        # Add as a printf like function
                        new_printf_like = PrintfLikeFunction(ref.function.name, orig.parameter_idx)  # pylint: disable=no-member
                        to_visit.append(new_printf_like)
                        self.new_printf_like_funcs.add(new_printf_like)

                        # Create a symbol for the new printf like function if it does not exist
                        if not self.bv.get_symbols_by_name(ref.function.name):
                            ref.function.name = ref.function.name

                    elif isinstance(orig, VarOriginConst) and self.is_addr_read_only(orig.const):
                        safe_origins.append(orig)

                    elif isinstance(orig, VarOriginCallResult) and orig.func_name in self.safe_functions:  # pylint: disable=no-member
                        # We accept that 'dcgettext' is safe because you need root to control the translation
                        safe_origins.append(orig)

                    else:
                        vuln_origins.append(orig)

                ref_result.set_result(safe_origins, vuln_origins)
                log_debug(str(ref_result))

                # ====================
                # Step 2: Heuristic to find function pointer calls that might me vulnerable
                self.heuristic_look_for_vul_function_ptr_calls(mlil_instr, var_origins)

        # ====================
        # Step 3: Save the exported functions to a file so other files that import them know they are printf like
        exported_printf_like_funcs = []
        for func in self.new_printf_like_funcs:
            syms = self.bv.get_symbols_by_name(func.name)
            if not syms:
                continue

            for s in syms:
                if s.type == SymbolType.FunctionSymbol and s.binding == SymbolBinding.GlobalBinding:
                    log_info(f"Saving exported function '{func.name}' to user_data")
                    exported_printf_like_funcs.append(func)
                    break

        if exported_printf_like_funcs:
            fname = os.path.basename(self.bv.file.filename)
            PrintfLikeFunction.save_to_user_data(fname, exported_printf_like_funcs)
Esempio n. 17
0
    def run(self, ssa_var: SSAVariable, should_highlight, visited=None):
        origins = []
        if visited is None:
            visited = set()

        while True:
            if ssa_var in visited:
                # Seen a case where we had (in '/bin/dash'):
                #   - r13_1#4 = ϕ(r13_1#3, r13_1#6)
                #   - r13_1#6 = ϕ(r13_1#4, r13_1#5)
                msg = f"Found phi vars (including {ssa_var}) that depend on each other in function {ssa_var.var.function.start}. I've only seen this happen a couple of times."
                log_error(msg)
                origins.append(VarOriginUnknown(msg))
                return origins
            visited.add(ssa_var)

            # Step 1: If we reach an ssa_var with version 0, it will have no more definitions
            if ssa_var.version == 0:
                is_parameter, parameter_idx = self.is_ssa_var_a_parameter(
                    ssa_var)
                if is_parameter:
                    origins.append(VarOriginParameter(parameter_idx))
                else:
                    # Var is version 0 but not a function parameter. Sometimes these are stack addrs.
                    origins.append(
                        VarOriginUnknown(
                            "Var is version 0 but not a function parameter"))
                return origins

            # Step 2: Get the next definition
            var_def_instr: MediumLevelILInstruction = self.mlil_ssa_func.get_ssa_var_definition(
                ssa_var)
            if var_def_instr is None:
                msg = f"{ssa_var} has no definition in function {hex(ssa_var.var.function.start)} (Not sure how this is possible)"
                log_error(msg)
                origins.append(VarOriginUnknown(msg))
                return origins

            if should_highlight:
                self.func.set_user_instr_highlight(
                    var_def_instr.address,
                    HighlightStandardColor.OrangeHighlightColor)

            # log_info(str(var_def_instr.operation) + ": " + str(var_def_instr))

            # Step 3: Get the next var/vars to check
            if var_def_instr.operation in (MLILOperation.MLIL_SET_VAR_SSA,
                                           MLILOperation.MLIL_SET_VAR_ALIASED):
                src = var_def_instr.src
                if src.operation == MLILOperation.MLIL_VAR_SSA:
                    # Keep propagating backwards
                    ssa_var = src.src
                    continue

                if src.operation in (MLILOperation.MLIL_CONST,
                                     MLILOperation.MLIL_CONST_PTR):
                    # Found a constant
                    origins.append(VarOriginConst(src.constant))
                elif src.operation == MLILOperation.MLIL_ADDRESS_OF:
                    origins.append(VarOriginAddressOf(src.src))
                elif src.operation == MLILOperation.MLIL_LOAD_SSA:
                    origins.append(VarOriginLoad())
                else:
                    # We are NOT interested in things like adds/subs because we are looking for either arguments or constants
                    msg = f"{src.operation.name} for a MLIL_SET_VAR_SSA src, so we stopped propagating the chain."
                    origins.append(VarOriginUnknown(msg))
                    log_warn(msg)

            elif var_def_instr.operation == MLILOperation.MLIL_VAR_PHI:
                # Find the origins of each PHI
                for phi_var in var_def_instr.src:
                    origins += self.run(phi_var,
                                        should_highlight=should_highlight,
                                        visited=visited)

            elif var_def_instr.operation == MLILOperation.MLIL_CALL_SSA:
                # Found a var defined as the result of a function call
                func_addr = var_def_instr.dest.value.value
                func = self.bv.get_function_at(func_addr)
                if func is None:
                    # A function call from an address that has no function?
                    msg = f"Couldn't get function at {hex(func_addr)} (from MLIL_CALL_SSA at {var_def_instr.address})."
                    origins.append(VarOriginUnknown(msg))
                    log_error(msg)
                else:
                    func_name = self.bv.get_function_at(
                        var_def_instr.dest.value.value).name
                    origins.append(VarOriginCallResult(func_name))

            else:
                # What is this??
                msg = f"{var_def_instr.operation.name} not supported at {hex(var_def_instr.address)}"
                origins.append(VarOriginUnknown(msg))
                log_error(msg)

            return origins
Esempio n. 18
0
    def visit_BitVecRef(self, expr):
        member = None
        var = None

        if expr.decl().name() == 'bvadd':
            left = self.visit(expr.arg(0))
            right = self.visit(expr.arg(1))

            return (left + [
                InstructionTextToken(InstructionTextTokenType.TextToken, ' + ')
            ] + right)

        if expr.decl().name() == "extract":
            end, start = expr.params()
            size = (end - start + 1) // 8
            var_name = expr.arg(0).decl().name()

            var = next((v for v in self._function.vars if v.name == var_name),
                       0)

            if var == 0:
                return self.visit(expr.arg(0))

            type_ = var.type

            if type_.type_class == TypeClass.NamedTypeReferenceClass:
                type_ = self._function.view.types[
                    type_.named_type_reference.name]

            if type_.type_class == TypeClass.StructureTypeClass:
                member = next(
                    (m for m in var.structure.members if m.offset == start),
                    None)
                member_name = member.name

            elif (var.source_type ==
                  VariableSourceType.RegisterVariableSourceType):
                member = next(
                    (subregister
                     for subregister in self._function.arch.regs.values()
                     if (subregister.full_width_reg == self._function.arch.
                         get_reg_name(var.storage) and subregister.size == size
                         and subregister.offset == start)),
                    None,
                )

                if member is not None:
                    member_name = self._function.arch.get_reg_name(
                        member.index)

            if member is None:
                mask = ((1 << (end + 1)) - 1) ^ ((1 << (start)) - 1)

                return [
                    InstructionTextToken(
                        InstructionTextTokenType.LocalVariableToken, var.name,
                        var.identifier),
                    InstructionTextToken(InstructionTextTokenType.TextToken,
                                         ' & '),
                    InstructionTextToken(InstructionTextTokenType.IntegerToken,
                                         hex(mask), mask)
                ]

        elif expr.decl().name() == 'select':
            log_debug(f"{expr.arg(0)}[{expr.arg(1)}]")
            return ([
                InstructionTextToken(InstructionTextTokenType.TextToken, '*(')
            ] + self.visit(expr.arg(1)) + [
                InstructionTextToken(InstructionTextTokenType.TextToken, ')')
            ])

        elif expr.decl().name() == 'concat':
            log_debug(f'{expr.num_args()}')

            if expr.num_args() > 2:
                raise NotImplementedError(
                    f"I don't know how to handle this: {expr}")

            left, right = expr.arg(0), expr.arg(1)

            max_size = expr.size()

            shift = right.size()

            left_size = left.size()

            end, start = left.params()

            if left_size + shift != max_size:
                raise NotImplementedError(
                    (f'This should never happen! '
                     f'{left_size} + {shift} != {max_size}'))

            if start != 0:
                left_tokens = self.visit(left)
            else:
                left_tokens = self.visit(left.arg(0))

            return (left_tokens + [
                InstructionTextToken(InstructionTextTokenType.TextToken,
                                     ' << '),
                InstructionTextToken(InstructionTextTokenType.IntegerToken,
                                     str(shift), shift)
            ])

        else:
            var_name = expr.decl().name()
            if var_name[0] == '&':
                var_name = var_name[1:]
            var = next((v for v in self._function.vars if v.name == var_name),
                       None)

        if var is None:
            log_warn(f"var is None: {expr.decl().name()}")

            return [
                InstructionTextToken(
                    InstructionTextTokenType.TextToken, '<Unknown token>'
                    if not expr.decl().name() else expr.decl().name())
            ]

        return ([
            InstructionTextToken(InstructionTextTokenType.TextToken, '&')
        ] if expr.decl().name()[0] == '&' else []) + ([
            InstructionTextToken(InstructionTextTokenType.LocalVariableToken,
                                 var.name, var.identifier)
        ]) + ([
            InstructionTextToken(InstructionTextTokenType.TextToken, "."),
            InstructionTextToken(InstructionTextTokenType.RegisterToken,
                                 member_name, var.identifier),
        ] if member is not None else [])