Example #1
0
    def _match_spec(self, nugget):
        """
        match the string `nugget` to a format specifier.
        """
        # TODO: handle positional modifiers and other similar format string tricks.
        all_spec = self._all_spec

        # iterate through nugget throwing away anything which is an int
        # TODO store this in a size variable

        original_nugget = nugget
        length_str = []
        length_spec = None
        length_spec_str_len = 0
        pad_chr = " "

        if nugget.startswith(b".*"):
            # ".*": precision is specified as an argument
            nugget = nugget[2:]
            length_spec = b".*"
            length_spec_str_len = 2
        elif nugget.startswith(b"0"):
            pad_chr = "0"
        elif nugget.startswith(b"."):
            pad_chr = "0"
            nugget = nugget[1:]

        for j, c in enumerate(nugget):
            if c in ascii_digits:
                length_str.append(c)
            else:
                nugget = nugget[j:]
                if length_spec is None:
                    length_spec = None if len(length_str) == 0 else int(
                        bytes(length_str))
                break

        # we need the length of the format's length specifier to extract the format and nothing else
        if length_spec_str_len == 0 and length_str:
            length_spec_str_len = len(length_str)
        # is it an actual format?
        for spec in all_spec:
            if nugget.startswith(spec):
                # this is gross coz sim_type is gross..
                nugget = nugget[:len(spec)]
                original_nugget = original_nugget[:(length_spec_str_len +
                                                    len(spec))]
                nugtype: 'SimType' = all_spec[nugget]
                try:
                    typeobj = nugtype.with_arch(
                        self.state.arch if self.state is not None else self.
                        project.arch)
                except Exception:
                    raise SimProcedureError(
                        "format specifier uses unknown type '%s'" %
                        repr(nugtype))
                return FormatSpecifier(original_nugget, length_spec, pad_chr,
                                       typeobj.size // 8, typeobj.signed)

        return None
Example #2
0
    def ret(self, expr=None):
        """
        Add an exit representing a return from this function.
        If this is not an inline call, grab a return address from the state and jump to it.
        If this is not an inline call, set a return expression with the calling convention.
        """
        self.inhibit_autoret = True

        if expr is not None:
            if o.SIMPLIFY_RETS in self.state.options:
                l.debug("... simplifying")
                l.debug("... before: %s", expr)
                expr = self.state.solver.simplify(expr)
                l.debug("... after: %s", expr)

            if self.symbolic_return:
                size = len(expr)
                new_expr = self.state.solver.Unconstrained(
                    "symbolic_return_" + self.display_name,
                    size,
                    key=('symbolic_return', self.display_name))  #pylint:disable=maybe-no-member
                self.state.add_constraints(new_expr == expr)
                expr = new_expr

            self.ret_expr = expr

        ret_addr = None
        # TODO: I had to put this check here because I don't understand why self.use_state_arguments gets reset to true
        # when calling the function ret. at the calling point the attribute is set to False
        if isinstance(self.addr, SootAddressDescriptor):
            ret_addr = self._compute_ret_addr(expr)  #pylint:disable=assignment-from-no-return
        elif self.use_state_arguments:
            if self.cc.args is not None:
                arg_types = [
                    isinstance(arg, (SimTypeFloat, SimTypeDouble))
                    for arg in self.cc.args
                ]
            else:
                # fall back to using self.num_args
                arg_types = [False] * self.num_args
            ret_addr = self.cc.teardown_callsite(self.state,
                                                 expr,
                                                 arg_types=arg_types)

        if not self.should_add_successors:
            l.debug("Returning without setting exits due to 'internal' call.")
            return

        if self.ret_to is not None:
            ret_addr = self.ret_to

        if ret_addr is None:
            raise SimProcedureError(
                "No source for return address in ret() call!")

        self._prepare_ret_state()

        self._exit_action(self.state, ret_addr)
        self.successors.add_successor(self.state, ret_addr,
                                      self.state.solver.true, 'Ijk_Ret')
Example #3
0
    def replace(self, startpos, args):
        """
        Implement printf - based on the stored format specifier information, format the values from the arg getter function `args` into a string.

        :param startpos:        The index of the first argument to be used by the first element of the format string
        :param args:            A function which, given an argument index, returns the integer argument to the current function at that index
        :return:                The result formatted string
        """

        argpos = startpos
        string = None

        for component in self.components:
            # if this is just concrete data
            if isinstance(component, bytes):
                string = self._add_to_string(string, self.parser.state.solver.BVV(component))
            elif isinstance(component, str):
                raise Exception("this branch should be impossible?")
            elif isinstance(component, claripy.ast.BV):
                string = self._add_to_string(string, component)
            else:
                # okay now for the interesting stuff
                # what type of format specifier is it?
                fmt_spec = component
                if fmt_spec.spec_type == b's':
                    if fmt_spec.length_spec == b".*":
                        str_length = args(argpos)
                        argpos += 1
                    else:
                        str_length = None
                    str_ptr = args(argpos)
                    string = self._add_to_string(string, self._get_str_at(str_ptr, max_length=str_length))
                # integers, for most of these we'll end up concretizing values..
                else:
                    i_val = args(argpos)
                    c_val = int(self.parser.state.solver.eval(i_val))
                    c_val &= (1 << (fmt_spec.size * 8)) - 1
                    if fmt_spec.signed and (c_val & (1 << ((fmt_spec.size * 8) - 1))):
                        c_val -= (1 << fmt_spec.size * 8)

                    if fmt_spec.spec_type in (b'd', b'i'):
                        s_val = str(c_val)
                    elif fmt_spec.spec_type == b'u':
                        s_val = str(c_val)
                    elif fmt_spec.spec_type == b'c':
                        s_val = chr(c_val & 0xff)
                    elif fmt_spec.spec_type == b'x':
                        s_val = hex(c_val)[2:]
                    elif fmt_spec.spec_type == b'o':
                        s_val = oct(c_val)[2:]
                    elif fmt_spec.spec_type == b'p':
                        s_val = hex(c_val)
                    else:
                        raise SimProcedureError("Unimplemented format specifier '%s'" % fmt_spec.spec_type)

                    string = self._add_to_string(string, self.parser.state.solver.BVV(s_val.encode()))

                argpos += 1

        return string
Example #4
0
def io_file_data_for_arch(arch):
    """
    A wrapper to get the _IO_FILE data for an architecture
    """
    if arch.name not in _IO_FILE:
        raise SimProcedureError("missing _IO_FILE offsets for arch: %s" %
                                arch.name)
    return _IO_FILE[arch.name]
Example #5
0
    def _parse(self, fmt_idx):
        """
        Parse format strings.

        :param fmt_idx: The index of the (pointer to the) format string in the arguments list.
        :returns:       A FormatString object which can be used for replacing the format specifiers with arguments or
                        for scanning into arguments.
        """

        fmtstr_ptr = self.arg(fmt_idx)

        if self.state.solver.symbolic(fmtstr_ptr):
            raise SimProcedureError("Symbolic pointer to (format) string :(")

        length = self._sim_strlen(fmtstr_ptr)
        if self.state.solver.symbolic(length):
            all_lengths = self.state.solver.eval_upto(length, 2)
            if len(all_lengths) != 1:
                raise SimProcedureError(
                    "Symbolic (format) string, game over :(")
            length = all_lengths[0]

        if self.state.solver.is_true(length == 0):
            return FormatString(self, [b""])

        fmt_xpr = self.state.memory.load(fmtstr_ptr, length)

        fmt = []
        for i in range(fmt_xpr.size(), 0, -8):
            char = fmt_xpr[i - 1:i - 8]
            try:
                conc_char = self.state.solver.eval_one(char)
            except SimSolverError:
                # For symbolic chars, just keep them symbolic
                fmt.append(char)
            else:
                # Concrete chars are directly appended to the list
                fmt.append(bytes([conc_char]))

        # make a FormatString object
        fmt_str = self._get_fmt(fmt)

        l.debug("Fmt: %r", fmt_str)

        return fmt_str
Example #6
0
    def replace(self, startpos, args):
        """
        Produce a new string based of the format string self with args `args` and return a new string, possibly
        symbolic.
        """

        argpos = startpos
        string = None

        for component in self.components:
            # if this is just concrete data
            if isinstance(component, str):
                string = self._add_to_string(
                    string, self.parser.state.se.BVV(component))
            elif isinstance(component, claripy.ast.BV):
                string = self._add_to_string(string, component)
            else:
                # okay now for the interesting stuff
                # what type of format specifier is it?
                fmt_spec = component
                if fmt_spec.spec_type == 's':
                    str_ptr = args(argpos)
                    string = self._add_to_string(string,
                                                 self._get_str_at(str_ptr))
                # integers, for most of these we'll end up concretizing values..
                else:
                    i_val = args(argpos)
                    c_val = int(self.parser.state.se.any_int(i_val))
                    c_val &= (1 << (fmt_spec.size * 8)) - 1
                    if fmt_spec.signed and (c_val &
                                            (1 << ((fmt_spec.size * 8) - 1))):
                        c_val -= (1 << fmt_spec.size * 8)

                    if fmt_spec.spec_type == 'd':
                        s_val = str(c_val)
                    elif fmt_spec.spec_type == 'u':
                        s_val = str(c_val)
                    elif fmt_spec.spec_type == 'c':
                        s_val = chr(c_val & 0xff)
                    elif fmt_spec.spec_type == 'x':
                        s_val = hex(c_val)[2:].rstrip('L')
                    elif fmt_spec.spec_type == 'o':
                        s_val = oct(c_val)[1:].rstrip('L')
                    elif fmt_spec.spec_type == 'p':
                        s_val = hex(c_val).rstrip('L')
                    else:
                        raise SimProcedureError(
                            "Unimplemented format specifier '%s'" %
                            fmt_spec.spec_type)

                    string = self._add_to_string(
                        string, self.parser.state.se.BVV(s_val))

                argpos += 1

        return string
Example #7
0
    def dynamic_returns(self, blocks, **kwargs) -> bool:  # pylint:disable=unused-argument
        """
        Determines if a call to this function returns or not by performing static analysis and heuristics.

        :param blocks:  Blocks that are executed before reaching this SimProcedure.
        :return:        True if the call returns, False otherwise.
        """

        if self.DYNAMIC_RET:
            raise SimProcedureError(
                f"dynamic_returns() is not implemented for {self}")

        return True
Example #8
0
    def _match_spec(self, nugget):
        """
        match the string `nugget` to a format specifier.
        """
        # TODO: handle positional modifiers and other similar format string tricks.
        all_spec = self._all_spec

        # iterate through nugget throwing away anything which is an int
        # TODO store this in a size variable

        original_nugget = nugget
        length_str = []
        length_spec = None
        discarded = False
        for j, c in enumerate(nugget):
            if c in ascii_digits:
                length_str.append(c)
            elif c == '*':
                discarded = True
            else:
                nugget = nugget[j:]
                length_spec = None if len(length_str) == 0 else int(
                    ''.join(length_str))
                break

        # we need the length of the format's length specifier to extract the format and nothing else
        length_spec_str_len = 0 if length_spec is None else len(length_str)
        discarded_len = 1 if discarded else 0
        # is it an actual format?
        for spec in all_spec:
            if nugget.startswith(spec):
                # this is gross coz sim_type is gross..
                nugget = nugget[:len(spec)]
                original_nugget = original_nugget[:(discarded_len +
                                                    length_spec_str_len +
                                                    len(spec))]
                nugtype = all_spec[nugget]
                try:
                    typeobj = sim_type.parse_type(nugtype).with_arch(
                        self.state.arch)
                except:
                    raise SimProcedureError(
                        "format specifier uses unknown type '%s'" %
                        repr(nugtype))
                return FormatSpecifier(original_nugget, length_spec,
                                       typeobj.size / 8, typeobj.signed,
                                       discarded)

        return None
Example #9
0
    def static_exits(self, blocks):  # pylint: disable=unused-argument
        """
        Get new exits by performing static analysis and heuristics. This is a fast and best-effort approach to get new
        exits for scenarios where states are not available (e.g. when building a fast CFG).

        :param list blocks: Blocks that are executed before reaching this SimProcedure.
        :return: A list of dicts. Each dict should contain the following entries: 'address', 'jumpkind', and 'namehint'.
        :rtype: list
        """

        if self.ADDS_EXITS:
            raise SimProcedureError("static_exits() is not implemented for %s" % self)

        # This SimProcedure does not add any new exit
        return []
Example #10
0
    def ret(self, expr=None):
        """
        Add an exit representing a return from this function.
        If this is not an inline call, grab a return address from the state and jump to it.
        If this is not an inline call, set a return expression with the calling convention.
        """
        self.inhibit_autoret = True

        if expr is not None:
            if o.SIMPLIFY_RETS in self.state.options:
                l.debug("... simplifying")
                l.debug("... before: %s", expr)
                expr = self.state.solver.simplify(expr)
                l.debug("... after: %s", expr)

            if self.symbolic_return:
                size = len(expr)
                new_expr = self.state.solver.Unconstrained(
                    "symbolic_return_" + self.display_name,
                    size,
                    key=('symbolic_return', self.display_name))  #pylint:disable=maybe-no-member
                self.state.add_constraints(new_expr == expr)
                expr = new_expr

            self.ret_expr = expr

        ret_addr = None
        if self.use_state_arguments:
            ret_addr = self.cc.teardown_callsite(
                self.state,
                expr,
                arg_types=[False] *
                self.num_args if self.cc.args is None else None)

        if not self.should_add_successors:
            l.debug("Returning without setting exits due to 'internal' call.")
            return

        if self.ret_to is not None:
            ret_addr = self.ret_to

        if ret_addr is None:
            raise SimProcedureError(
                "No source for return address in ret() call!")

        self._exit_action(self.state, ret_addr)
        self.successors.add_successor(self.state, ret_addr,
                                      self.state.solver.true, 'Ijk_Ret')
Example #11
0
    def strtol_inner(s, state, region, base, signed, read_length=None):
        """
        :param s: the string address/offset
        :param state: SimState
        :param region: memory, file, etc
        :param base: the base to use to interpret the number
        note: all numbers may start with +/- and base 16 may start with 0x
        :param signed: boolean, true means the result will be signed, otherwise unsigned
        :param read_length: int, the number of bytes parsed in strtol
        :return: expression, value, num_bytes
        the returned expression is a symbolic boolean indicating success, value will be set to 0 on failure
        value is the returned value (set to min/max on overflow)
        num_bytes is the number of bytes read in the string
        """

        # sanity check
        if base < 2 or base > 36:
            raise SimProcedureError("base should be in the range [2,36]")

        # order matters here since we will use an if then else tree, and -0x will have precedence over -
        prefixes = [b"-", b"+", b""]
        if base == 16:
            prefixes = [b"0x", b"-0x", b"+0x"] + prefixes

        cases = []
        conditions = []
        possible_num_bytes = []

        for prefix in prefixes:
            if read_length and read_length < len(prefix):
                continue
            condition, value, num_bytes = strtol._load_num_with_prefix(
                prefix, s, region, state, base, signed, read_length)
            conditions.append(condition)
            cases.append((condition, value))
            possible_num_bytes.append(num_bytes)

        # only one of the cases needed to match
        result = state.solver.ite_cases(cases[:-1], cases[-1][1])
        expression = state.solver.Or(*conditions)
        num_bytes = state.solver.ite_cases(zip(conditions, possible_num_bytes),
                                           0)
        return expression, result, num_bytes
Example #12
0
 def run(self, *args, **kwargs):
     """
     Implement the actual procedure here!
     """
     raise SimProcedureError("%s does not implement a run() method" %
                             self.__class__.__name__)
Example #13
0
    def execute(self, state, successors=None, arguments=None, ret_to=None):
        """
        Call this method with a SimState and a SimSuccessors to execute the procedure.

        Alternately, successors may be none if this is an inline call. In that case, you should
        provide arguments to the function.
        """
        # fill out all the fun stuff we don't want to frontload
        if self.addr is None:
            self.addr = state.addr
        if self.arch is None:
            self.arch = state.arch
        if self.project is None:
            self.project = state.project
        if self.cc is None:
            if self.arch.name in DEFAULT_CC:
                self.cc = DEFAULT_CC[self.arch.name](self.arch)
            else:
                raise SimProcedureError(
                    'There is no default calling convention for architecture %s.'
                    ' You must specify a calling convention.', self.arch.name)

        inst = copy.copy(self)
        inst.state = state
        inst.successors = successors
        inst.ret_to = ret_to

        # check to see if this is a syscall and if we should override its return value
        override = None
        if inst.is_syscall:
            state.history.recent_syscall_count = 1
            if len(state.posix.queued_syscall_returns):
                override = state.posix.queued_syscall_returns.pop(0)

        if callable(override):
            try:
                r = override(state, run=inst)
            except TypeError:
                r = override(state)
            inst.use_state_arguments = True

        elif override is not None:
            r = override
            inst.use_state_arguments = True

        else:
            # get the arguments

            # handle if this is a continuation from a return
            if inst.is_continuation:
                if state.callstack.top.procedure_data is None:
                    raise SimProcedureError(
                        "Tried to return to a SimProcedure in an inapplicable stack frame!"
                    )

                saved_sp, sim_args, saved_local_vars = state.callstack.top.procedure_data
                state.regs.sp = saved_sp
                inst.arguments = sim_args
                inst.use_state_arguments = True
                for name, val in saved_local_vars:
                    setattr(inst, name, val)
            else:
                if arguments is None:
                    inst.use_state_arguments = True
                    sim_args = [inst.arg(_) for _ in xrange(inst.num_args)]
                    inst.arguments = sim_args
                else:
                    inst.use_state_arguments = False
                    sim_args = arguments[:inst.num_args]
                    inst.arguments = arguments

            # run it
            r = getattr(inst, inst.run_func)(*sim_args, **inst.kwargs)

        if inst.returns and (not inst.successors
                             or len(inst.successors.successors) == 0):
            inst.ret(r)

        return inst
Example #14
0
 def _setup_args(self, inst, state, args): #pylint:disable=unused-argument,no-self-use
     raise SimProcedureError("the java-specific _setup_args() method was invoked on a non-Java SimProcedure.")
Example #15
0
 def _compute_ret_addr(self, expr): #pylint:disable=unused-argument,no-self-use
     raise SimProcedureError("the java-specific _compute_ret_addr() method was invoked on a non-Java SimProcedure.")
Example #16
0
 def run(self, *args, **kwargs): # pylint: disable=unused-argument
     """
     Implement the actual procedure here!
     """
     raise SimProcedureError("%s does not implement a run() method" % self.__class__.__name__)
Example #17
0
    def replace(self, startpos, args):
        """
        Implement printf - based on the stored format specifier information, format the values from the arg getter function `args` into a string.

        :param startpos:        The index of the first argument to be used by the first element of the format string
        :param args:            A function which, given an argument index, returns the integer argument to the current function at that index
        :return:                The result formatted string
        """

        argpos = startpos
        string = None

        for component in self.components:
            # if this is just concrete data
            if isinstance(component, str):
                string = self._add_to_string(
                    string, self.parser.state.se.BVV(component))
            elif isinstance(component, claripy.ast.BV):
                string = self._add_to_string(string, component)
            else:
                # okay now for the interesting stuff
                # what type of format specifier is it?
                fmt_spec = component
                if fmt_spec.spec_type == 's':
                    str_ptr = args(argpos)
                    string = self._add_to_string(string,
                                                 self._get_str_at(str_ptr))
                # integers, for most of these we'll end up concretizing values..
                else:
                    i_val = args(argpos)
                    if fmt_spec.spec_type == 'n':
                        self.state.memory.store(
                            i_val,
                            self.state.se.BVS('format_%n',
                                              self.state.arch.bits))
                    else:
                        c_val = int(self.parser.state.se.eval(i_val))
                        c_val &= (1 << (fmt_spec.size * 8)) - 1
                        if fmt_spec.signed and (c_val &
                                                (1 <<
                                                 ((fmt_spec.size * 8) - 1))):
                            c_val -= (1 << fmt_spec.size * 8)

                        if fmt_spec.spec_type in ('d', 'i'):
                            s_val = str(c_val)
                        elif fmt_spec.spec_type == 'u':
                            s_val = str(c_val)
                        elif fmt_spec.spec_type == 'c':
                            s_val = chr(c_val & 0xff)
                        elif fmt_spec.spec_type == 'x':
                            s_val = hex(c_val)[2:].rstrip('L')
                        elif fmt_spec.spec_type == 'o':
                            s_val = oct(c_val)[1:].rstrip('L')
                        elif fmt_spec.spec_type == 'p':
                            s_val = hex(c_val).rstrip('L')
                        else:
                            raise SimProcedureError(
                                "Unimplemented format specifier '%s'" %
                                fmt_spec.spec_type)

                        string = self._add_to_string(
                            string, self.parser.state.se.BVV(s_val))

                argpos += 1

        return string
Example #18
0
    def execute(self, state, successors=None, arguments=None, ret_to=None):
        """
        Call this method with a SimState and a SimSuccessors to execute the procedure.

        Alternately, successors may be none if this is an inline call. In that case, you should
        provide arguments to the function.
        """
        # fill out all the fun stuff we don't want to frontload
        if self.addr is None and not state.regs._ip.symbolic:
            self.addr = state.addr
        if self.arch is None:
            self.arch = state.arch
        if self.project is None:
            self.project = state.project
        if self.cc is None:
            if self.arch.name in DEFAULT_CC:
                self.cc = DEFAULT_CC[self.arch.name](self.arch)
            else:
                raise SimProcedureError('There is no default calling convention for architecture %s.'
                                        ' You must specify a calling convention.' % self.arch.name)

        inst = copy.copy(self)
        inst.state = state
        inst.successors = successors
        inst.ret_to = ret_to
        inst.inhibit_autoret = False

        # check to see if this is a syscall and if we should override its return value
        if inst.is_syscall:
            state.history.recent_syscall_count = 1

        state._inspect(
            'simprocedure',
            BP_BEFORE,
            simprocedure_name=inst.display_name,
            simprocedure_addr=self.addr,
            simprocedure=inst,
            simprocedure_result=NO_OVERRIDE
        )

        r = state._inspect_getattr('simprocedure_result', NO_OVERRIDE)
        if r is NO_OVERRIDE:
            # get the arguments

            # If the simprocedure is related to a Java function call the appropriate setup_args methos
            # TODO: should we move this?
            if self.is_java:
                sim_args = self._setup_args(inst, state, arguments) #pylint:disable=assignment-from-no-return
                self.use_state_arguments = False

            # handle if this is a continuation from a return
            elif inst.is_continuation:
                if state.callstack.top.procedure_data is None:
                    raise SimProcedureError("Tried to return to a SimProcedure in an inapplicable stack frame!")

                saved_sp, sim_args, saved_local_vars, saved_lr, ideal_addr = state.callstack.top.procedure_data
                if ideal_addr != inst.addr:
                    raise SimShadowStackError("I can't emulate this consequence of stack smashing")
                state.regs.sp = saved_sp
                if saved_lr is not None:
                    state.regs.lr = saved_lr
                inst.arguments = sim_args
                inst.use_state_arguments = True
                inst.call_ret_expr = state.registers.load(state.arch.ret_offset, state.arch.bytes, endness=state.arch.register_endness)
                for name, val in saved_local_vars:
                    setattr(inst, name, val)
            else:
                if arguments is None:
                    inst.use_state_arguments = True
                    sim_args = [ inst.arg(_) for _ in range(inst.num_args) ]
                    inst.arguments = sim_args
                else:
                    inst.use_state_arguments = False
                    sim_args = arguments[:inst.num_args]
                    inst.arguments = arguments

            # run it
            l.debug("Executing %s%s%s%s%s with %s, %s", *(inst._describe_me() + (sim_args, inst.kwargs)))
            r = getattr(inst, inst.run_func)(*sim_args, **inst.kwargs)

        state._inspect(
            'simprocedure',
            BP_AFTER,
            simprocedure_result=r
        )
        r = state._inspect_getattr('simprocedure_result', r)

        if inst.returns and inst.is_function and not inst.inhibit_autoret:
            inst.ret(r)

        return inst
Example #19
0
    def interpret(self, addr, startpos, args, region=None):
        """
        Interpret a format string, reading the data at `addr` in `region` into `args` starting at `startpos`.
        """

        # TODO: we only support one format specifier in interpretation for now

        format_specifier_count = len(
            filter(lambda x: isinstance(x, FormatSpecifier), self.components))
        if format_specifier_count > 1:
            l.warning(
                "We don't support more than one format specifiers in format strings."
            )

        if region is None:
            region = self.parser.state.memory

        bits = self.parser.state.arch.bits
        failed = self.parser.state.se.BVV(0, bits)
        argpos = startpos
        position = addr
        for component in self.components:
            if isinstance(component, str):
                # TODO we skip non-format-specifiers in format string interpretation for now
                # if the region doesn't match the concrete component, we need to return immediately
                pass
            else:
                fmt_spec = component
                try:
                    dest = args(argpos)
                except SimProcedureArgumentError:
                    dest = None
                if fmt_spec.spec_type == 's':
                    # set some limits for the find
                    max_str_len = self.parser.state.libc.max_str_len
                    max_sym_bytes = self.parser.state.libc.buf_symbolic_bytes

                    # has the length of the format been limited by the string itself?
                    if fmt_spec.length_spec is not None:
                        max_str_len = fmt_spec.length_spec
                        max_sym_bytes = fmt_spec.length_spec

                    # TODO: look for limits on other characters which scanf is sensitive to, '\x00', '\x20'
                    ohr, ohc, ohi = region.find(
                        position,
                        self.parser.state.se.BVV('\n'),
                        max_str_len,
                        max_symbolic_bytes=max_sym_bytes)

                    # if no newline is found, mm is position + max_strlen
                    # If-branch will really only happen for format specifiers with a length
                    mm = self.parser.state.se.If(ohr == 0,
                                                 position + max_str_len, ohr)
                    # we're just going to concretize the length, load will do this anyways
                    length = self.parser.state.se.max_int(mm - position)
                    src_str = region.load(position, length)

                    # TODO all of these should be delimiters we search for above
                    # add that the contents of the string cannot be any scanf %s string delimiters
                    for delimiter in set(
                            FormatString.SCANF_DELIMITERS) - {'\x00'}:
                        delim_bvv = self.parser.state.se.BVV(delimiter)
                        for i in range(length):
                            self.parser.state.add_constraints(
                                region.load(position + i, 1) != delim_bvv)

                    # write it out to the pointer
                    self.parser.state.memory.store(dest, src_str)
                    # store the terminating null byte
                    self.parser.state.memory.store(
                        dest + length, self.parser.state.se.BVV(0, 8))

                    position += length

                else:

                    # XXX: atoi only supports strings of one byte
                    if fmt_spec.spec_type in ['d', 'u', 'x']:
                        base = 16 if fmt_spec.spec_type == 'x' else 10
                        status, i, num_bytes = self.parser._sim_atoi_inner(
                            position,
                            region,
                            base=base,
                            read_length=fmt_spec.length_spec)
                        # increase failed count if we were unable to parse it
                        failed = self.parser.state.se.If(
                            status, failed, failed + 1)
                        position += num_bytes
                    elif fmt_spec.spec_type == 'c':
                        i = region.load(position, 1)
                        i = i.zero_extend(bits - 8)
                        position += 1
                    else:
                        raise SimProcedureError(
                            "unsupported format spec '%s' in interpret" %
                            fmt_spec.spec_type)

                    i = self.parser.state.se.Extract(fmt_spec.size * 8 - 1, 0,
                                                     i)
                    self.parser.state.memory.store(
                        dest,
                        i,
                        size=fmt_spec.size,
                        endness=self.parser.state.arch.memory_endness)

                argpos += 1

        # we return (new position, number of items parsed)
        # new position is used for interpreting from a file, so we can increase file position
        return (position, ((argpos - startpos) - failed))
Example #20
0
    def replace(self, va_arg):
        """
        Implement printf - based on the stored format specifier information, format the values from the arg getter function `args` into a string.

        :param va_arg:          A function which takes a type and returns the next argument of that type
        :return:                The result formatted string
        """

        string = None

        for component in self.components:
            # if this is just concrete data
            if isinstance(component, bytes):
                string = self._add_to_string(
                    string, self.parser.state.solver.BVV(component))
            elif isinstance(component, str):
                raise Exception("this branch should be impossible?")
            elif isinstance(component, claripy.ast.BV):  # pylint:disable=isinstance-second-argument-not-valid-type
                string = self._add_to_string(string, component)
            else:
                # okay now for the interesting stuff
                # what type of format specifier is it?
                fmt_spec = component
                if fmt_spec.spec_type == b's':
                    if fmt_spec.length_spec == b".*":
                        str_length = va_arg('size_t')
                    else:
                        str_length = None
                    str_ptr = va_arg('char*')
                    string = self._add_to_string(
                        string, self._get_str_at(str_ptr,
                                                 max_length=str_length))
                # integers, for most of these we'll end up concretizing values..
                else:
                    # ummmmmmm this is a cheap translation but I think it should work
                    i_val = va_arg('void*')
                    c_val = int(self.parser.state.solver.eval(i_val))
                    c_val &= (1 << (fmt_spec.size * 8)) - 1
                    if fmt_spec.signed and (c_val &
                                            (1 << ((fmt_spec.size * 8) - 1))):
                        c_val -= (1 << fmt_spec.size * 8)

                    if fmt_spec.spec_type in (b'd', b'i'):
                        s_val = str(c_val)
                    elif fmt_spec.spec_type == b'u':
                        s_val = str(c_val)
                    elif fmt_spec.spec_type == b'c':
                        s_val = chr(c_val & 0xff)
                    elif fmt_spec.spec_type == b'x':
                        s_val = hex(c_val)[2:]
                    elif fmt_spec.spec_type == b'o':
                        s_val = oct(c_val)[2:]
                    elif fmt_spec.spec_type == b'p':
                        s_val = hex(c_val)
                    else:
                        raise SimProcedureError(
                            "Unimplemented format specifier '%s'" %
                            fmt_spec.spec_type)

                    if isinstance(fmt_spec.length_spec, int):
                        s_val = s_val.rjust(fmt_spec.length_spec,
                                            fmt_spec.pad_chr)

                    string = self._add_to_string(
                        string, self.parser.state.solver.BVV(s_val.encode()))

        return string
Example #21
0
    def interpret(self, startpos, args, addr=None, simfd=None):
        """
        implement scanf - extract formatted data from memory or a file according to the stored format
        specifiers and store them into the pointers extracted from `args`.

        :param startpos:    The index of the first argument corresponding to the first format element
        :param args:        A function which, given the index of an argument to the function, returns that argument
        :param addr:        The address in the memory to extract data from, or...
        :param simfd:       A file descriptor to use for reading data from
        :return:            The number of arguments parsed
        """
        if simfd is not None and isinstance(simfd.read_storage, SimPackets):
            argnum = startpos
            for component in self.components:
                if type(component) is bytes:
                    sdata, _ = simfd.read_data(len(component),
                                               short_reads=False)
                    self.state.solver.add(sdata == component)
                elif isinstance(component, claripy.Bits):
                    sdata, _ = simfd.read_data(len(component) // 8,
                                               short_reads=False)
                    self.state.solver.add(sdata == component)
                elif component.spec_type == b's':
                    if component.length_spec is None:
                        sdata, slen = simfd.read_data(
                            self.state.libc.buf_symbolic_bytes)
                    else:
                        sdata, slen = simfd.read_data(component.length_spec)
                    for byte in sdata.chop(8):
                        self.state.solver.add(
                            claripy.And(*[
                                byte != char for char in self.SCANF_DELIMITERS
                            ]))
                    self.state.memory.store(args(argnum), sdata, size=slen)
                    self.state.memory.store(
                        args(argnum) + slen, claripy.BVV(0, 8))
                    argnum += 1
                elif component.spec_type == b'c':
                    sdata, _ = simfd.read_data(1, short_reads=False)
                    self.state.memory.store(args(argnum), sdata)
                    argnum += 1
                else:
                    bits = component.size * 8
                    if component.spec_type == b'x':
                        base = 16
                    elif component.spec_type == b'o':
                        base = 8
                    else:
                        base = 10

                    # here's the variable representing the result of the parsing
                    target_variable = self.state.solver.BVS(
                        'scanf_' + component.string.decode(),
                        bits,
                        key=('api', 'scanf', argnum - startpos,
                             component.string))
                    negative = claripy.SLT(target_variable, 0)

                    # how many digits does it take to represent this variable fully?
                    max_digits = int(math.ceil(math.log(2**bits, base)))

                    # how many digits does the format specify?
                    spec_digits = component.length_spec

                    # how many bits can we specify as input?
                    available_bits = float(
                        'inf'
                    ) if spec_digits is None else spec_digits * math.log(
                        base, 2)
                    not_enough_bits = available_bits < bits

                    # how many digits will we model this input as?
                    digits = max_digits if spec_digits is None else spec_digits

                    # constrain target variable range explicitly if it can't take on all possible values
                    if not_enough_bits:
                        self.state.solver.add(
                            self.state.solver.And(
                                self.state.solver.SLE(target_variable,
                                                      (base**digits) - 1),
                                self.state.solver.SGE(
                                    target_variable,
                                    -(base**(digits - 1) - 1))))

                    # perform the parsing in reverse - constrain the input digits to be the string version of the input
                    # this only works because we're reading from a packet stream and therefore nobody has the ability
                    # to add other constraints to this data!
                    # this makes z3's job EXTREMELY easy
                    sdata, _ = simfd.read_data(digits, short_reads=False)
                    for i, digit in enumerate(reversed(sdata.chop(8))):
                        digit_value = (target_variable // (base**i)) % base
                        digit_ascii = digit_value + ord('0')
                        if base > 10:
                            digit_ascii = claripy.If(
                                digit_value >= 10,
                                digit_value + (-10 + ord('a')), digit_ascii)

                        # if there aren't enough bits, we can increase the range by accounting for the possibility that
                        # the first digit is a minus sign
                        if not_enough_bits:
                            if i == digits - 1:
                                neg_digit_ascii = ord('-')
                            else:
                                neg_digit_value = (-target_variable //
                                                   (base**i)) % base
                                neg_digit_ascii = neg_digit_value + ord('0')
                                if base > 10:
                                    neg_digit_ascii = claripy.If(
                                        neg_digit_value >= 10,
                                        neg_digit_value + (-10 + ord('a')),
                                        neg_digit_ascii)

                            digit_ascii = claripy.If(negative, neg_digit_ascii,
                                                     digit_ascii)

                        self.state.solver.add(digit == digit_ascii[7:0])

                    self.state.memory.store(
                        args(argnum),
                        target_variable,
                        endness=self.state.arch.memory_endness)
                    argnum += 1

            return argnum - startpos

        # TODO: we only support one format specifier in interpretation for now

        format_specifier_count = sum(1 for x in self.components
                                     if isinstance(x, FormatSpecifier))
        if format_specifier_count > 1:
            l.warning(
                "We don't support more than one format specifiers in format strings."
            )

        if simfd is not None:
            region = simfd.read_storage
            addr = simfd._pos if hasattr(
                simfd, '_pos') else simfd._read_pos  # XXX THIS IS BAD
        else:
            region = self.parser.state.memory

        bits = self.parser.state.arch.bits
        failed = self.parser.state.solver.BVV(0, bits)
        argpos = startpos
        position = addr
        for component in self.components:
            if isinstance(component, bytes):
                # TODO we skip non-format-specifiers in format string interpretation for now
                # if the region doesn't match the concrete component, we need to return immediately
                pass
            else:
                fmt_spec = component
                try:
                    dest = args(argpos)
                except SimProcedureArgumentError:
                    dest = None
                if fmt_spec.spec_type == b's':
                    # set some limits for the find
                    max_str_len = self.parser.state.libc.max_str_len
                    max_sym_bytes = self.parser.state.libc.buf_symbolic_bytes

                    # has the length of the format been limited by the string itself?
                    if fmt_spec.length_spec is not None:
                        max_str_len = fmt_spec.length_spec
                        max_sym_bytes = fmt_spec.length_spec

                    # TODO: look for limits on other characters which scanf is sensitive to, '\x00', '\x20'
                    ohr, ohc, ohi = region.find(
                        position,
                        self.parser.state.solver.BVV(b'\n'),
                        max_str_len,
                        max_symbolic_bytes=max_sym_bytes)

                    # if no newline is found, mm is position + max_strlen
                    # If-branch will really only happen for format specifiers with a length
                    mm = self.parser.state.solver.If(ohr == 0,
                                                     position + max_str_len,
                                                     ohr)
                    # we're just going to concretize the length, load will do this anyways
                    length = self.parser.state.solver.max_int(mm - position)
                    src_str = region.load(position, length)

                    # TODO all of these should be delimiters we search for above
                    # add that the contents of the string cannot be any scanf %s string delimiters
                    for delimiter in set(FormatString.SCANF_DELIMITERS):
                        delim_bvv = self.parser.state.solver.BVV(delimiter)
                        for i in range(length):
                            self.parser.state.add_constraints(
                                region.load(position + i, 1) != delim_bvv)

                    # write it out to the pointer
                    self.parser.state.memory.store(dest, src_str)
                    # store the terminating null byte
                    self.parser.state.memory.store(
                        dest + length, self.parser.state.solver.BVV(0, 8))

                    position += length

                else:

                    # XXX: atoi only supports strings of one byte
                    if fmt_spec.spec_type in [b'd', b'i', b'u', b'x']:
                        base = 16 if fmt_spec.spec_type == b'x' else 10
                        status, i, num_bytes = self.parser._sim_atoi_inner(
                            position,
                            region,
                            base=base,
                            read_length=fmt_spec.length_spec)
                        # increase failed count if we were unable to parse it
                        failed = self.parser.state.solver.If(
                            status, failed, failed + 1)
                        position += num_bytes
                    elif fmt_spec.spec_type == b'c':
                        i = region.load(position, 1)
                        i = i.zero_extend(bits - 8)
                        position += 1
                    else:
                        raise SimProcedureError(
                            "unsupported format spec '%s' in interpret" %
                            fmt_spec.spec_type)

                    i = self.parser.state.solver.Extract(
                        fmt_spec.size * 8 - 1, 0, i)
                    self.parser.state.memory.store(
                        dest,
                        i,
                        size=fmt_spec.size,
                        endness=self.parser.state.arch.memory_endness)

                argpos += 1

        if simfd is not None:
            _, realsize = simfd.read_data(position - addr)
            self.state.solver.add(realsize == position - addr)

        return (argpos - startpos) - failed
Example #22
0
    def execute(self, state, successors=None, arguments=None, ret_to=None):
        """
        Call this method with a SimState and a SimSuccessors to execute the procedure.

        Alternately, successors may be none if this is an inline call. In that case, you should
        provide arguments to the function.
        """
        # fill out all the fun stuff we don't want to frontload
        if self.addr is None and not state.regs.ip.symbolic:
            self.addr = state.addr
        if self.arch is None:
            self.arch = state.arch
        if self.project is None:
            self.project = state.project
        if self.cc is None:
            if self.arch.name in DEFAULT_CC:
                self.cc = DEFAULT_CC[self.arch.name](self.arch)
            else:
                raise SimProcedureError('There is no default calling convention for architecture %s.'
                                        ' You must specify a calling convention.' % self.arch.name)

        inst = copy.copy(self)
        inst.state = state
        inst.successors = successors
        inst.ret_to = ret_to
        inst.inhibit_autoret = False

        # check to see if this is a syscall and if we should override its return value
        override = None
        if inst.is_syscall:
            state.history.recent_syscall_count = 1
            if len(state.posix.queued_syscall_returns):
                override = state.posix.queued_syscall_returns.pop(0)

        if callable(override):
            try:
                r = override(state, run=inst)
            except TypeError:
                r = override(state)
            inst.use_state_arguments = True

        elif override is not None:
            r = override
            inst.use_state_arguments = True

        else:
            # get the arguments

            # If the simprocedure is related to a Java function call the appropriate setup_args methos
            # TODO: should we move this?
            if self.is_java:
                sim_args = self._setup_args(inst, state, arguments)
                self.use_state_arguments = False

            # handle if this is a continuation from a return
            elif inst.is_continuation:
                if state.callstack.top.procedure_data is None:
                    raise SimProcedureError("Tried to return to a SimProcedure in an inapplicable stack frame!")

                saved_sp, sim_args, saved_local_vars, saved_lr = state.callstack.top.procedure_data
                state.regs.sp = saved_sp
                if saved_lr is not None:
                    state.regs.lr = saved_lr
                inst.arguments = sim_args
                inst.use_state_arguments = True
                inst.call_ret_expr = state.registers.load(state.arch.ret_offset, state.arch.bytes, endness=state.arch.register_endness)
                for name, val in saved_local_vars:
                    setattr(inst, name, val)
            else:
                if arguments is None:
                    inst.use_state_arguments = True
                    sim_args = [ inst.arg(_) for _ in range(inst.num_args) ]
                    inst.arguments = sim_args
                else:
                    inst.use_state_arguments = False
                    sim_args = arguments[:inst.num_args]
                    inst.arguments = arguments

            # run it
            l.debug("Executing %s%s%s%s%s with %s, %s", *(inst._describe_me() + (sim_args, inst.kwargs)))
            r = getattr(inst, inst.run_func)(*sim_args, **inst.kwargs)

        if inst.returns and inst.is_function and not inst.inhibit_autoret:
            inst.ret(r)

        return inst