def _match_spec(self, nugget): """ match the string `nugget` to a format specifier. """ # TODO: handle positional modifiers and other similar format string tricks. all_spec = self._all_spec # iterate through nugget throwing away anything which is an int # TODO store this in a size variable original_nugget = nugget length_str = [] length_spec = None length_spec_str_len = 0 pad_chr = " " if nugget.startswith(b".*"): # ".*": precision is specified as an argument nugget = nugget[2:] length_spec = b".*" length_spec_str_len = 2 elif nugget.startswith(b"0"): pad_chr = "0" elif nugget.startswith(b"."): pad_chr = "0" nugget = nugget[1:] for j, c in enumerate(nugget): if c in ascii_digits: length_str.append(c) else: nugget = nugget[j:] if length_spec is None: length_spec = None if len(length_str) == 0 else int( bytes(length_str)) break # we need the length of the format's length specifier to extract the format and nothing else if length_spec_str_len == 0 and length_str: length_spec_str_len = len(length_str) # is it an actual format? for spec in all_spec: if nugget.startswith(spec): # this is gross coz sim_type is gross.. nugget = nugget[:len(spec)] original_nugget = original_nugget[:(length_spec_str_len + len(spec))] nugtype: 'SimType' = all_spec[nugget] try: typeobj = nugtype.with_arch( self.state.arch if self.state is not None else self. project.arch) except Exception: raise SimProcedureError( "format specifier uses unknown type '%s'" % repr(nugtype)) return FormatSpecifier(original_nugget, length_spec, pad_chr, typeobj.size // 8, typeobj.signed) return None
def ret(self, expr=None): """ Add an exit representing a return from this function. If this is not an inline call, grab a return address from the state and jump to it. If this is not an inline call, set a return expression with the calling convention. """ self.inhibit_autoret = True if expr is not None: if o.SIMPLIFY_RETS in self.state.options: l.debug("... simplifying") l.debug("... before: %s", expr) expr = self.state.solver.simplify(expr) l.debug("... after: %s", expr) if self.symbolic_return: size = len(expr) new_expr = self.state.solver.Unconstrained( "symbolic_return_" + self.display_name, size, key=('symbolic_return', self.display_name)) #pylint:disable=maybe-no-member self.state.add_constraints(new_expr == expr) expr = new_expr self.ret_expr = expr ret_addr = None # TODO: I had to put this check here because I don't understand why self.use_state_arguments gets reset to true # when calling the function ret. at the calling point the attribute is set to False if isinstance(self.addr, SootAddressDescriptor): ret_addr = self._compute_ret_addr(expr) #pylint:disable=assignment-from-no-return elif self.use_state_arguments: if self.cc.args is not None: arg_types = [ isinstance(arg, (SimTypeFloat, SimTypeDouble)) for arg in self.cc.args ] else: # fall back to using self.num_args arg_types = [False] * self.num_args ret_addr = self.cc.teardown_callsite(self.state, expr, arg_types=arg_types) if not self.should_add_successors: l.debug("Returning without setting exits due to 'internal' call.") return if self.ret_to is not None: ret_addr = self.ret_to if ret_addr is None: raise SimProcedureError( "No source for return address in ret() call!") self._prepare_ret_state() self._exit_action(self.state, ret_addr) self.successors.add_successor(self.state, ret_addr, self.state.solver.true, 'Ijk_Ret')
def replace(self, startpos, args): """ Implement printf - based on the stored format specifier information, format the values from the arg getter function `args` into a string. :param startpos: The index of the first argument to be used by the first element of the format string :param args: A function which, given an argument index, returns the integer argument to the current function at that index :return: The result formatted string """ argpos = startpos string = None for component in self.components: # if this is just concrete data if isinstance(component, bytes): string = self._add_to_string(string, self.parser.state.solver.BVV(component)) elif isinstance(component, str): raise Exception("this branch should be impossible?") elif isinstance(component, claripy.ast.BV): string = self._add_to_string(string, component) else: # okay now for the interesting stuff # what type of format specifier is it? fmt_spec = component if fmt_spec.spec_type == b's': if fmt_spec.length_spec == b".*": str_length = args(argpos) argpos += 1 else: str_length = None str_ptr = args(argpos) string = self._add_to_string(string, self._get_str_at(str_ptr, max_length=str_length)) # integers, for most of these we'll end up concretizing values.. else: i_val = args(argpos) c_val = int(self.parser.state.solver.eval(i_val)) c_val &= (1 << (fmt_spec.size * 8)) - 1 if fmt_spec.signed and (c_val & (1 << ((fmt_spec.size * 8) - 1))): c_val -= (1 << fmt_spec.size * 8) if fmt_spec.spec_type in (b'd', b'i'): s_val = str(c_val) elif fmt_spec.spec_type == b'u': s_val = str(c_val) elif fmt_spec.spec_type == b'c': s_val = chr(c_val & 0xff) elif fmt_spec.spec_type == b'x': s_val = hex(c_val)[2:] elif fmt_spec.spec_type == b'o': s_val = oct(c_val)[2:] elif fmt_spec.spec_type == b'p': s_val = hex(c_val) else: raise SimProcedureError("Unimplemented format specifier '%s'" % fmt_spec.spec_type) string = self._add_to_string(string, self.parser.state.solver.BVV(s_val.encode())) argpos += 1 return string
def io_file_data_for_arch(arch): """ A wrapper to get the _IO_FILE data for an architecture """ if arch.name not in _IO_FILE: raise SimProcedureError("missing _IO_FILE offsets for arch: %s" % arch.name) return _IO_FILE[arch.name]
def _parse(self, fmt_idx): """ Parse format strings. :param fmt_idx: The index of the (pointer to the) format string in the arguments list. :returns: A FormatString object which can be used for replacing the format specifiers with arguments or for scanning into arguments. """ fmtstr_ptr = self.arg(fmt_idx) if self.state.solver.symbolic(fmtstr_ptr): raise SimProcedureError("Symbolic pointer to (format) string :(") length = self._sim_strlen(fmtstr_ptr) if self.state.solver.symbolic(length): all_lengths = self.state.solver.eval_upto(length, 2) if len(all_lengths) != 1: raise SimProcedureError( "Symbolic (format) string, game over :(") length = all_lengths[0] if self.state.solver.is_true(length == 0): return FormatString(self, [b""]) fmt_xpr = self.state.memory.load(fmtstr_ptr, length) fmt = [] for i in range(fmt_xpr.size(), 0, -8): char = fmt_xpr[i - 1:i - 8] try: conc_char = self.state.solver.eval_one(char) except SimSolverError: # For symbolic chars, just keep them symbolic fmt.append(char) else: # Concrete chars are directly appended to the list fmt.append(bytes([conc_char])) # make a FormatString object fmt_str = self._get_fmt(fmt) l.debug("Fmt: %r", fmt_str) return fmt_str
def replace(self, startpos, args): """ Produce a new string based of the format string self with args `args` and return a new string, possibly symbolic. """ argpos = startpos string = None for component in self.components: # if this is just concrete data if isinstance(component, str): string = self._add_to_string( string, self.parser.state.se.BVV(component)) elif isinstance(component, claripy.ast.BV): string = self._add_to_string(string, component) else: # okay now for the interesting stuff # what type of format specifier is it? fmt_spec = component if fmt_spec.spec_type == 's': str_ptr = args(argpos) string = self._add_to_string(string, self._get_str_at(str_ptr)) # integers, for most of these we'll end up concretizing values.. else: i_val = args(argpos) c_val = int(self.parser.state.se.any_int(i_val)) c_val &= (1 << (fmt_spec.size * 8)) - 1 if fmt_spec.signed and (c_val & (1 << ((fmt_spec.size * 8) - 1))): c_val -= (1 << fmt_spec.size * 8) if fmt_spec.spec_type == 'd': s_val = str(c_val) elif fmt_spec.spec_type == 'u': s_val = str(c_val) elif fmt_spec.spec_type == 'c': s_val = chr(c_val & 0xff) elif fmt_spec.spec_type == 'x': s_val = hex(c_val)[2:].rstrip('L') elif fmt_spec.spec_type == 'o': s_val = oct(c_val)[1:].rstrip('L') elif fmt_spec.spec_type == 'p': s_val = hex(c_val).rstrip('L') else: raise SimProcedureError( "Unimplemented format specifier '%s'" % fmt_spec.spec_type) string = self._add_to_string( string, self.parser.state.se.BVV(s_val)) argpos += 1 return string
def dynamic_returns(self, blocks, **kwargs) -> bool: # pylint:disable=unused-argument """ Determines if a call to this function returns or not by performing static analysis and heuristics. :param blocks: Blocks that are executed before reaching this SimProcedure. :return: True if the call returns, False otherwise. """ if self.DYNAMIC_RET: raise SimProcedureError( f"dynamic_returns() is not implemented for {self}") return True
def _match_spec(self, nugget): """ match the string `nugget` to a format specifier. """ # TODO: handle positional modifiers and other similar format string tricks. all_spec = self._all_spec # iterate through nugget throwing away anything which is an int # TODO store this in a size variable original_nugget = nugget length_str = [] length_spec = None discarded = False for j, c in enumerate(nugget): if c in ascii_digits: length_str.append(c) elif c == '*': discarded = True else: nugget = nugget[j:] length_spec = None if len(length_str) == 0 else int( ''.join(length_str)) break # we need the length of the format's length specifier to extract the format and nothing else length_spec_str_len = 0 if length_spec is None else len(length_str) discarded_len = 1 if discarded else 0 # is it an actual format? for spec in all_spec: if nugget.startswith(spec): # this is gross coz sim_type is gross.. nugget = nugget[:len(spec)] original_nugget = original_nugget[:(discarded_len + length_spec_str_len + len(spec))] nugtype = all_spec[nugget] try: typeobj = sim_type.parse_type(nugtype).with_arch( self.state.arch) except: raise SimProcedureError( "format specifier uses unknown type '%s'" % repr(nugtype)) return FormatSpecifier(original_nugget, length_spec, typeobj.size / 8, typeobj.signed, discarded) return None
def static_exits(self, blocks): # pylint: disable=unused-argument """ Get new exits by performing static analysis and heuristics. This is a fast and best-effort approach to get new exits for scenarios where states are not available (e.g. when building a fast CFG). :param list blocks: Blocks that are executed before reaching this SimProcedure. :return: A list of dicts. Each dict should contain the following entries: 'address', 'jumpkind', and 'namehint'. :rtype: list """ if self.ADDS_EXITS: raise SimProcedureError("static_exits() is not implemented for %s" % self) # This SimProcedure does not add any new exit return []
def ret(self, expr=None): """ Add an exit representing a return from this function. If this is not an inline call, grab a return address from the state and jump to it. If this is not an inline call, set a return expression with the calling convention. """ self.inhibit_autoret = True if expr is not None: if o.SIMPLIFY_RETS in self.state.options: l.debug("... simplifying") l.debug("... before: %s", expr) expr = self.state.solver.simplify(expr) l.debug("... after: %s", expr) if self.symbolic_return: size = len(expr) new_expr = self.state.solver.Unconstrained( "symbolic_return_" + self.display_name, size, key=('symbolic_return', self.display_name)) #pylint:disable=maybe-no-member self.state.add_constraints(new_expr == expr) expr = new_expr self.ret_expr = expr ret_addr = None if self.use_state_arguments: ret_addr = self.cc.teardown_callsite( self.state, expr, arg_types=[False] * self.num_args if self.cc.args is None else None) if not self.should_add_successors: l.debug("Returning without setting exits due to 'internal' call.") return if self.ret_to is not None: ret_addr = self.ret_to if ret_addr is None: raise SimProcedureError( "No source for return address in ret() call!") self._exit_action(self.state, ret_addr) self.successors.add_successor(self.state, ret_addr, self.state.solver.true, 'Ijk_Ret')
def strtol_inner(s, state, region, base, signed, read_length=None): """ :param s: the string address/offset :param state: SimState :param region: memory, file, etc :param base: the base to use to interpret the number note: all numbers may start with +/- and base 16 may start with 0x :param signed: boolean, true means the result will be signed, otherwise unsigned :param read_length: int, the number of bytes parsed in strtol :return: expression, value, num_bytes the returned expression is a symbolic boolean indicating success, value will be set to 0 on failure value is the returned value (set to min/max on overflow) num_bytes is the number of bytes read in the string """ # sanity check if base < 2 or base > 36: raise SimProcedureError("base should be in the range [2,36]") # order matters here since we will use an if then else tree, and -0x will have precedence over - prefixes = [b"-", b"+", b""] if base == 16: prefixes = [b"0x", b"-0x", b"+0x"] + prefixes cases = [] conditions = [] possible_num_bytes = [] for prefix in prefixes: if read_length and read_length < len(prefix): continue condition, value, num_bytes = strtol._load_num_with_prefix( prefix, s, region, state, base, signed, read_length) conditions.append(condition) cases.append((condition, value)) possible_num_bytes.append(num_bytes) # only one of the cases needed to match result = state.solver.ite_cases(cases[:-1], cases[-1][1]) expression = state.solver.Or(*conditions) num_bytes = state.solver.ite_cases(zip(conditions, possible_num_bytes), 0) return expression, result, num_bytes
def run(self, *args, **kwargs): """ Implement the actual procedure here! """ raise SimProcedureError("%s does not implement a run() method" % self.__class__.__name__)
def execute(self, state, successors=None, arguments=None, ret_to=None): """ Call this method with a SimState and a SimSuccessors to execute the procedure. Alternately, successors may be none if this is an inline call. In that case, you should provide arguments to the function. """ # fill out all the fun stuff we don't want to frontload if self.addr is None: self.addr = state.addr if self.arch is None: self.arch = state.arch if self.project is None: self.project = state.project if self.cc is None: if self.arch.name in DEFAULT_CC: self.cc = DEFAULT_CC[self.arch.name](self.arch) else: raise SimProcedureError( 'There is no default calling convention for architecture %s.' ' You must specify a calling convention.', self.arch.name) inst = copy.copy(self) inst.state = state inst.successors = successors inst.ret_to = ret_to # check to see if this is a syscall and if we should override its return value override = None if inst.is_syscall: state.history.recent_syscall_count = 1 if len(state.posix.queued_syscall_returns): override = state.posix.queued_syscall_returns.pop(0) if callable(override): try: r = override(state, run=inst) except TypeError: r = override(state) inst.use_state_arguments = True elif override is not None: r = override inst.use_state_arguments = True else: # get the arguments # handle if this is a continuation from a return if inst.is_continuation: if state.callstack.top.procedure_data is None: raise SimProcedureError( "Tried to return to a SimProcedure in an inapplicable stack frame!" ) saved_sp, sim_args, saved_local_vars = state.callstack.top.procedure_data state.regs.sp = saved_sp inst.arguments = sim_args inst.use_state_arguments = True for name, val in saved_local_vars: setattr(inst, name, val) else: if arguments is None: inst.use_state_arguments = True sim_args = [inst.arg(_) for _ in xrange(inst.num_args)] inst.arguments = sim_args else: inst.use_state_arguments = False sim_args = arguments[:inst.num_args] inst.arguments = arguments # run it r = getattr(inst, inst.run_func)(*sim_args, **inst.kwargs) if inst.returns and (not inst.successors or len(inst.successors.successors) == 0): inst.ret(r) return inst
def _setup_args(self, inst, state, args): #pylint:disable=unused-argument,no-self-use raise SimProcedureError("the java-specific _setup_args() method was invoked on a non-Java SimProcedure.")
def _compute_ret_addr(self, expr): #pylint:disable=unused-argument,no-self-use raise SimProcedureError("the java-specific _compute_ret_addr() method was invoked on a non-Java SimProcedure.")
def run(self, *args, **kwargs): # pylint: disable=unused-argument """ Implement the actual procedure here! """ raise SimProcedureError("%s does not implement a run() method" % self.__class__.__name__)
def replace(self, startpos, args): """ Implement printf - based on the stored format specifier information, format the values from the arg getter function `args` into a string. :param startpos: The index of the first argument to be used by the first element of the format string :param args: A function which, given an argument index, returns the integer argument to the current function at that index :return: The result formatted string """ argpos = startpos string = None for component in self.components: # if this is just concrete data if isinstance(component, str): string = self._add_to_string( string, self.parser.state.se.BVV(component)) elif isinstance(component, claripy.ast.BV): string = self._add_to_string(string, component) else: # okay now for the interesting stuff # what type of format specifier is it? fmt_spec = component if fmt_spec.spec_type == 's': str_ptr = args(argpos) string = self._add_to_string(string, self._get_str_at(str_ptr)) # integers, for most of these we'll end up concretizing values.. else: i_val = args(argpos) if fmt_spec.spec_type == 'n': self.state.memory.store( i_val, self.state.se.BVS('format_%n', self.state.arch.bits)) else: c_val = int(self.parser.state.se.eval(i_val)) c_val &= (1 << (fmt_spec.size * 8)) - 1 if fmt_spec.signed and (c_val & (1 << ((fmt_spec.size * 8) - 1))): c_val -= (1 << fmt_spec.size * 8) if fmt_spec.spec_type in ('d', 'i'): s_val = str(c_val) elif fmt_spec.spec_type == 'u': s_val = str(c_val) elif fmt_spec.spec_type == 'c': s_val = chr(c_val & 0xff) elif fmt_spec.spec_type == 'x': s_val = hex(c_val)[2:].rstrip('L') elif fmt_spec.spec_type == 'o': s_val = oct(c_val)[1:].rstrip('L') elif fmt_spec.spec_type == 'p': s_val = hex(c_val).rstrip('L') else: raise SimProcedureError( "Unimplemented format specifier '%s'" % fmt_spec.spec_type) string = self._add_to_string( string, self.parser.state.se.BVV(s_val)) argpos += 1 return string
def execute(self, state, successors=None, arguments=None, ret_to=None): """ Call this method with a SimState and a SimSuccessors to execute the procedure. Alternately, successors may be none if this is an inline call. In that case, you should provide arguments to the function. """ # fill out all the fun stuff we don't want to frontload if self.addr is None and not state.regs._ip.symbolic: self.addr = state.addr if self.arch is None: self.arch = state.arch if self.project is None: self.project = state.project if self.cc is None: if self.arch.name in DEFAULT_CC: self.cc = DEFAULT_CC[self.arch.name](self.arch) else: raise SimProcedureError('There is no default calling convention for architecture %s.' ' You must specify a calling convention.' % self.arch.name) inst = copy.copy(self) inst.state = state inst.successors = successors inst.ret_to = ret_to inst.inhibit_autoret = False # check to see if this is a syscall and if we should override its return value if inst.is_syscall: state.history.recent_syscall_count = 1 state._inspect( 'simprocedure', BP_BEFORE, simprocedure_name=inst.display_name, simprocedure_addr=self.addr, simprocedure=inst, simprocedure_result=NO_OVERRIDE ) r = state._inspect_getattr('simprocedure_result', NO_OVERRIDE) if r is NO_OVERRIDE: # get the arguments # If the simprocedure is related to a Java function call the appropriate setup_args methos # TODO: should we move this? if self.is_java: sim_args = self._setup_args(inst, state, arguments) #pylint:disable=assignment-from-no-return self.use_state_arguments = False # handle if this is a continuation from a return elif inst.is_continuation: if state.callstack.top.procedure_data is None: raise SimProcedureError("Tried to return to a SimProcedure in an inapplicable stack frame!") saved_sp, sim_args, saved_local_vars, saved_lr, ideal_addr = state.callstack.top.procedure_data if ideal_addr != inst.addr: raise SimShadowStackError("I can't emulate this consequence of stack smashing") state.regs.sp = saved_sp if saved_lr is not None: state.regs.lr = saved_lr inst.arguments = sim_args inst.use_state_arguments = True inst.call_ret_expr = state.registers.load(state.arch.ret_offset, state.arch.bytes, endness=state.arch.register_endness) for name, val in saved_local_vars: setattr(inst, name, val) else: if arguments is None: inst.use_state_arguments = True sim_args = [ inst.arg(_) for _ in range(inst.num_args) ] inst.arguments = sim_args else: inst.use_state_arguments = False sim_args = arguments[:inst.num_args] inst.arguments = arguments # run it l.debug("Executing %s%s%s%s%s with %s, %s", *(inst._describe_me() + (sim_args, inst.kwargs))) r = getattr(inst, inst.run_func)(*sim_args, **inst.kwargs) state._inspect( 'simprocedure', BP_AFTER, simprocedure_result=r ) r = state._inspect_getattr('simprocedure_result', r) if inst.returns and inst.is_function and not inst.inhibit_autoret: inst.ret(r) return inst
def interpret(self, addr, startpos, args, region=None): """ Interpret a format string, reading the data at `addr` in `region` into `args` starting at `startpos`. """ # TODO: we only support one format specifier in interpretation for now format_specifier_count = len( filter(lambda x: isinstance(x, FormatSpecifier), self.components)) if format_specifier_count > 1: l.warning( "We don't support more than one format specifiers in format strings." ) if region is None: region = self.parser.state.memory bits = self.parser.state.arch.bits failed = self.parser.state.se.BVV(0, bits) argpos = startpos position = addr for component in self.components: if isinstance(component, str): # TODO we skip non-format-specifiers in format string interpretation for now # if the region doesn't match the concrete component, we need to return immediately pass else: fmt_spec = component try: dest = args(argpos) except SimProcedureArgumentError: dest = None if fmt_spec.spec_type == 's': # set some limits for the find max_str_len = self.parser.state.libc.max_str_len max_sym_bytes = self.parser.state.libc.buf_symbolic_bytes # has the length of the format been limited by the string itself? if fmt_spec.length_spec is not None: max_str_len = fmt_spec.length_spec max_sym_bytes = fmt_spec.length_spec # TODO: look for limits on other characters which scanf is sensitive to, '\x00', '\x20' ohr, ohc, ohi = region.find( position, self.parser.state.se.BVV('\n'), max_str_len, max_symbolic_bytes=max_sym_bytes) # if no newline is found, mm is position + max_strlen # If-branch will really only happen for format specifiers with a length mm = self.parser.state.se.If(ohr == 0, position + max_str_len, ohr) # we're just going to concretize the length, load will do this anyways length = self.parser.state.se.max_int(mm - position) src_str = region.load(position, length) # TODO all of these should be delimiters we search for above # add that the contents of the string cannot be any scanf %s string delimiters for delimiter in set( FormatString.SCANF_DELIMITERS) - {'\x00'}: delim_bvv = self.parser.state.se.BVV(delimiter) for i in range(length): self.parser.state.add_constraints( region.load(position + i, 1) != delim_bvv) # write it out to the pointer self.parser.state.memory.store(dest, src_str) # store the terminating null byte self.parser.state.memory.store( dest + length, self.parser.state.se.BVV(0, 8)) position += length else: # XXX: atoi only supports strings of one byte if fmt_spec.spec_type in ['d', 'u', 'x']: base = 16 if fmt_spec.spec_type == 'x' else 10 status, i, num_bytes = self.parser._sim_atoi_inner( position, region, base=base, read_length=fmt_spec.length_spec) # increase failed count if we were unable to parse it failed = self.parser.state.se.If( status, failed, failed + 1) position += num_bytes elif fmt_spec.spec_type == 'c': i = region.load(position, 1) i = i.zero_extend(bits - 8) position += 1 else: raise SimProcedureError( "unsupported format spec '%s' in interpret" % fmt_spec.spec_type) i = self.parser.state.se.Extract(fmt_spec.size * 8 - 1, 0, i) self.parser.state.memory.store( dest, i, size=fmt_spec.size, endness=self.parser.state.arch.memory_endness) argpos += 1 # we return (new position, number of items parsed) # new position is used for interpreting from a file, so we can increase file position return (position, ((argpos - startpos) - failed))
def replace(self, va_arg): """ Implement printf - based on the stored format specifier information, format the values from the arg getter function `args` into a string. :param va_arg: A function which takes a type and returns the next argument of that type :return: The result formatted string """ string = None for component in self.components: # if this is just concrete data if isinstance(component, bytes): string = self._add_to_string( string, self.parser.state.solver.BVV(component)) elif isinstance(component, str): raise Exception("this branch should be impossible?") elif isinstance(component, claripy.ast.BV): # pylint:disable=isinstance-second-argument-not-valid-type string = self._add_to_string(string, component) else: # okay now for the interesting stuff # what type of format specifier is it? fmt_spec = component if fmt_spec.spec_type == b's': if fmt_spec.length_spec == b".*": str_length = va_arg('size_t') else: str_length = None str_ptr = va_arg('char*') string = self._add_to_string( string, self._get_str_at(str_ptr, max_length=str_length)) # integers, for most of these we'll end up concretizing values.. else: # ummmmmmm this is a cheap translation but I think it should work i_val = va_arg('void*') c_val = int(self.parser.state.solver.eval(i_val)) c_val &= (1 << (fmt_spec.size * 8)) - 1 if fmt_spec.signed and (c_val & (1 << ((fmt_spec.size * 8) - 1))): c_val -= (1 << fmt_spec.size * 8) if fmt_spec.spec_type in (b'd', b'i'): s_val = str(c_val) elif fmt_spec.spec_type == b'u': s_val = str(c_val) elif fmt_spec.spec_type == b'c': s_val = chr(c_val & 0xff) elif fmt_spec.spec_type == b'x': s_val = hex(c_val)[2:] elif fmt_spec.spec_type == b'o': s_val = oct(c_val)[2:] elif fmt_spec.spec_type == b'p': s_val = hex(c_val) else: raise SimProcedureError( "Unimplemented format specifier '%s'" % fmt_spec.spec_type) if isinstance(fmt_spec.length_spec, int): s_val = s_val.rjust(fmt_spec.length_spec, fmt_spec.pad_chr) string = self._add_to_string( string, self.parser.state.solver.BVV(s_val.encode())) return string
def interpret(self, startpos, args, addr=None, simfd=None): """ implement scanf - extract formatted data from memory or a file according to the stored format specifiers and store them into the pointers extracted from `args`. :param startpos: The index of the first argument corresponding to the first format element :param args: A function which, given the index of an argument to the function, returns that argument :param addr: The address in the memory to extract data from, or... :param simfd: A file descriptor to use for reading data from :return: The number of arguments parsed """ if simfd is not None and isinstance(simfd.read_storage, SimPackets): argnum = startpos for component in self.components: if type(component) is bytes: sdata, _ = simfd.read_data(len(component), short_reads=False) self.state.solver.add(sdata == component) elif isinstance(component, claripy.Bits): sdata, _ = simfd.read_data(len(component) // 8, short_reads=False) self.state.solver.add(sdata == component) elif component.spec_type == b's': if component.length_spec is None: sdata, slen = simfd.read_data( self.state.libc.buf_symbolic_bytes) else: sdata, slen = simfd.read_data(component.length_spec) for byte in sdata.chop(8): self.state.solver.add( claripy.And(*[ byte != char for char in self.SCANF_DELIMITERS ])) self.state.memory.store(args(argnum), sdata, size=slen) self.state.memory.store( args(argnum) + slen, claripy.BVV(0, 8)) argnum += 1 elif component.spec_type == b'c': sdata, _ = simfd.read_data(1, short_reads=False) self.state.memory.store(args(argnum), sdata) argnum += 1 else: bits = component.size * 8 if component.spec_type == b'x': base = 16 elif component.spec_type == b'o': base = 8 else: base = 10 # here's the variable representing the result of the parsing target_variable = self.state.solver.BVS( 'scanf_' + component.string.decode(), bits, key=('api', 'scanf', argnum - startpos, component.string)) negative = claripy.SLT(target_variable, 0) # how many digits does it take to represent this variable fully? max_digits = int(math.ceil(math.log(2**bits, base))) # how many digits does the format specify? spec_digits = component.length_spec # how many bits can we specify as input? available_bits = float( 'inf' ) if spec_digits is None else spec_digits * math.log( base, 2) not_enough_bits = available_bits < bits # how many digits will we model this input as? digits = max_digits if spec_digits is None else spec_digits # constrain target variable range explicitly if it can't take on all possible values if not_enough_bits: self.state.solver.add( self.state.solver.And( self.state.solver.SLE(target_variable, (base**digits) - 1), self.state.solver.SGE( target_variable, -(base**(digits - 1) - 1)))) # perform the parsing in reverse - constrain the input digits to be the string version of the input # this only works because we're reading from a packet stream and therefore nobody has the ability # to add other constraints to this data! # this makes z3's job EXTREMELY easy sdata, _ = simfd.read_data(digits, short_reads=False) for i, digit in enumerate(reversed(sdata.chop(8))): digit_value = (target_variable // (base**i)) % base digit_ascii = digit_value + ord('0') if base > 10: digit_ascii = claripy.If( digit_value >= 10, digit_value + (-10 + ord('a')), digit_ascii) # if there aren't enough bits, we can increase the range by accounting for the possibility that # the first digit is a minus sign if not_enough_bits: if i == digits - 1: neg_digit_ascii = ord('-') else: neg_digit_value = (-target_variable // (base**i)) % base neg_digit_ascii = neg_digit_value + ord('0') if base > 10: neg_digit_ascii = claripy.If( neg_digit_value >= 10, neg_digit_value + (-10 + ord('a')), neg_digit_ascii) digit_ascii = claripy.If(negative, neg_digit_ascii, digit_ascii) self.state.solver.add(digit == digit_ascii[7:0]) self.state.memory.store( args(argnum), target_variable, endness=self.state.arch.memory_endness) argnum += 1 return argnum - startpos # TODO: we only support one format specifier in interpretation for now format_specifier_count = sum(1 for x in self.components if isinstance(x, FormatSpecifier)) if format_specifier_count > 1: l.warning( "We don't support more than one format specifiers in format strings." ) if simfd is not None: region = simfd.read_storage addr = simfd._pos if hasattr( simfd, '_pos') else simfd._read_pos # XXX THIS IS BAD else: region = self.parser.state.memory bits = self.parser.state.arch.bits failed = self.parser.state.solver.BVV(0, bits) argpos = startpos position = addr for component in self.components: if isinstance(component, bytes): # TODO we skip non-format-specifiers in format string interpretation for now # if the region doesn't match the concrete component, we need to return immediately pass else: fmt_spec = component try: dest = args(argpos) except SimProcedureArgumentError: dest = None if fmt_spec.spec_type == b's': # set some limits for the find max_str_len = self.parser.state.libc.max_str_len max_sym_bytes = self.parser.state.libc.buf_symbolic_bytes # has the length of the format been limited by the string itself? if fmt_spec.length_spec is not None: max_str_len = fmt_spec.length_spec max_sym_bytes = fmt_spec.length_spec # TODO: look for limits on other characters which scanf is sensitive to, '\x00', '\x20' ohr, ohc, ohi = region.find( position, self.parser.state.solver.BVV(b'\n'), max_str_len, max_symbolic_bytes=max_sym_bytes) # if no newline is found, mm is position + max_strlen # If-branch will really only happen for format specifiers with a length mm = self.parser.state.solver.If(ohr == 0, position + max_str_len, ohr) # we're just going to concretize the length, load will do this anyways length = self.parser.state.solver.max_int(mm - position) src_str = region.load(position, length) # TODO all of these should be delimiters we search for above # add that the contents of the string cannot be any scanf %s string delimiters for delimiter in set(FormatString.SCANF_DELIMITERS): delim_bvv = self.parser.state.solver.BVV(delimiter) for i in range(length): self.parser.state.add_constraints( region.load(position + i, 1) != delim_bvv) # write it out to the pointer self.parser.state.memory.store(dest, src_str) # store the terminating null byte self.parser.state.memory.store( dest + length, self.parser.state.solver.BVV(0, 8)) position += length else: # XXX: atoi only supports strings of one byte if fmt_spec.spec_type in [b'd', b'i', b'u', b'x']: base = 16 if fmt_spec.spec_type == b'x' else 10 status, i, num_bytes = self.parser._sim_atoi_inner( position, region, base=base, read_length=fmt_spec.length_spec) # increase failed count if we were unable to parse it failed = self.parser.state.solver.If( status, failed, failed + 1) position += num_bytes elif fmt_spec.spec_type == b'c': i = region.load(position, 1) i = i.zero_extend(bits - 8) position += 1 else: raise SimProcedureError( "unsupported format spec '%s' in interpret" % fmt_spec.spec_type) i = self.parser.state.solver.Extract( fmt_spec.size * 8 - 1, 0, i) self.parser.state.memory.store( dest, i, size=fmt_spec.size, endness=self.parser.state.arch.memory_endness) argpos += 1 if simfd is not None: _, realsize = simfd.read_data(position - addr) self.state.solver.add(realsize == position - addr) return (argpos - startpos) - failed
def execute(self, state, successors=None, arguments=None, ret_to=None): """ Call this method with a SimState and a SimSuccessors to execute the procedure. Alternately, successors may be none if this is an inline call. In that case, you should provide arguments to the function. """ # fill out all the fun stuff we don't want to frontload if self.addr is None and not state.regs.ip.symbolic: self.addr = state.addr if self.arch is None: self.arch = state.arch if self.project is None: self.project = state.project if self.cc is None: if self.arch.name in DEFAULT_CC: self.cc = DEFAULT_CC[self.arch.name](self.arch) else: raise SimProcedureError('There is no default calling convention for architecture %s.' ' You must specify a calling convention.' % self.arch.name) inst = copy.copy(self) inst.state = state inst.successors = successors inst.ret_to = ret_to inst.inhibit_autoret = False # check to see if this is a syscall and if we should override its return value override = None if inst.is_syscall: state.history.recent_syscall_count = 1 if len(state.posix.queued_syscall_returns): override = state.posix.queued_syscall_returns.pop(0) if callable(override): try: r = override(state, run=inst) except TypeError: r = override(state) inst.use_state_arguments = True elif override is not None: r = override inst.use_state_arguments = True else: # get the arguments # If the simprocedure is related to a Java function call the appropriate setup_args methos # TODO: should we move this? if self.is_java: sim_args = self._setup_args(inst, state, arguments) self.use_state_arguments = False # handle if this is a continuation from a return elif inst.is_continuation: if state.callstack.top.procedure_data is None: raise SimProcedureError("Tried to return to a SimProcedure in an inapplicable stack frame!") saved_sp, sim_args, saved_local_vars, saved_lr = state.callstack.top.procedure_data state.regs.sp = saved_sp if saved_lr is not None: state.regs.lr = saved_lr inst.arguments = sim_args inst.use_state_arguments = True inst.call_ret_expr = state.registers.load(state.arch.ret_offset, state.arch.bytes, endness=state.arch.register_endness) for name, val in saved_local_vars: setattr(inst, name, val) else: if arguments is None: inst.use_state_arguments = True sim_args = [ inst.arg(_) for _ in range(inst.num_args) ] inst.arguments = sim_args else: inst.use_state_arguments = False sim_args = arguments[:inst.num_args] inst.arguments = arguments # run it l.debug("Executing %s%s%s%s%s with %s, %s", *(inst._describe_me() + (sim_args, inst.kwargs))) r = getattr(inst, inst.run_func)(*sim_args, **inst.kwargs) if inst.returns and inst.is_function and not inst.inhibit_autoret: inst.ret(r) return inst