def do_discover_caller_names(bv, func): param_name = interaction.get_text_line_input( "Please enter the name of the parameter that contains the method name", "Parameter name") # Docs says the above function returns a string with a link to the Python 3 # docs (e.g. a Py3 str), but it actually returns a bytes-object under Py3 param_name = param_name.decode("utf-8") func_params = [ param for param in func.parameter_vars if param.name == param_name ] force = False if len(func_params) != 1: log_error("Unable to determine method name argument") return for name, func in discover_names(func, func_params).items(): # Skip if named correctly if func.symbol.name == name: continue # Skip if we're not forcing and the user has named this already if not func.symbol.auto and not force: log_debug("Skipped %r due to no auto symbol" % name) continue log_info("Renaming %r to %r" % (func, name)) func.view.define_auto_symbol( Symbol(func.symbol.type, func.symbol.address, short_name=name))
def _define_classes(view: BinaryView, class_t: Type): __objc_data = view.sections.get('__objc_data') if __objc_data is None: raise KeyError('This binary has no __objc_data section') for addr in range(__objc_data.start, __objc_data.end, class_t.width): current_class = Class.from_address(addr, view) log_debug(f"Created {current_class}") __objc_classrefs = view.sections.get('__objc_classrefs') if __objc_classrefs is None: raise KeyError('This binary has no __objc_classrefs section') for addr in range(__objc_classrefs.start, __objc_classrefs.end, view.address_size): view.define_user_data_var(addr, Type.pointer(view.arch, class_t)) class_addr = int.from_bytes( view.read(addr, view.address_size), "little" if view.endianness is Endianness.LittleEndian else "big") class_ = view.session_data['ClassList'].get( class_addr) if class_addr else None if class_ is not None: log_debug(f"{addr:x} points to {class_!r}") view.define_user_symbol( Symbol(SymbolType.DataSymbol, addr, f"_OBJC_CLASS_$_{class_.vtable.name}@GOT"))
def __eq__(self, other): log_debug("__eq__") if not isinstance(other, type(self)): return False return (True if self._type == other._type and self.start == other.start else False)
def visit_MLIL_CONST(self, expr): view = expr.function.source_function.view class_symbol = view.get_symbol_at(expr.constant) if class_symbol is None: return log_debug(class_symbol.name) class_match = re.match( r'_OBJC_(META)?CLASS_\$_(?P<classname>[_A-Za-z0-9=/]+)(@GOT)?', class_symbol.name ) class_name = class_match.group('classname') class_type = view.types.get(class_name) if class_type is None: view.define_user_type(class_name, Type.structure_type(Structure())) class_type = view.types.get(class_name) return Type.pointer( view.arch, Type.named_type_from_type(class_name, class_type) )
def init(self): self.arch = Architecture['EVM'] self.platform = Architecture['EVM'].standalone_platform self.max_function_size_for_analysis = 0 file_size = len(self.raw) # Find swarm hashes and make them data evm_bytes = self.raw.read(0, file_size) # code is everything that isn't a swarm hash code = IntervalSet([Interval(0, file_size)]) log_debug('Finding swarm hashes') swarm_hashes = self.find_swarm_hashes(evm_bytes) for start, sz in swarm_hashes: self.add_auto_segment( start, sz, start, sz, (SegmentFlag.SegmentContainsData | SegmentFlag.SegmentDenyExecute | SegmentFlag.SegmentReadable | SegmentFlag.SegmentDenyWrite)) code -= IntervalSet([Interval(start, start + sz)]) for interval in code: if isinstance(interval, int): continue self.add_auto_segment( interval.lower_bound, interval.upper_bound, interval.lower_bound, interval.upper_bound, (SegmentFlag.SegmentReadable | SegmentFlag.SegmentExecutable)) log_debug('Building CFG with evm_cfg_builder') cfg = CFG(evm_bytes, remove_metadata=False) log_debug('Finished building CFG with evm_cfg_builder') Function.set_default_session_data('cfg', cfg) log_debug("registering VsaNotification") self.register_notification(VsaNotification()) log_debug("specifiying entry point and functions") self.add_entry_point(0) for function in cfg.functions: function_start = (function._start_addr + 1 if function._start_addr != 0 else 0) self.define_auto_symbol( Symbol(SymbolType.FunctionSymbol, function_start, function.name)) self.add_function(function_start) # disable linear sweep Settings().set_bool('analysis.linearSweep.autorun', False, view=self, scope=SettingsScope.SettingsUserScope) return True
def _parse_function_type(type_string: str, self_name: str, view: BinaryView, is_class=False) -> Type: log_debug(f'_parse_function_type {type_string}') ret_type_str = type_string[0] # Handle structures defined in the function types if ret_type_str == '{': ret_type, type_string = _parse_structure(type_string[1:], view) else: ret_type = _lookup_type(ret_type_str, view) type_string = type_string[1:] stack_size = ''.join(takewhile(str.isdigit, type_string)) type_string = type_string[len(stack_size):] stack_size = int(stack_size) if stack_size else None args = [] while type_string: if type_string[0] == '{': arg_type, type_string = _parse_structure(type_string[1:], view) args.append(Type.pointer(view.arch, arg_type)) else: arg_type = ''.join( takewhile(lambda i: not str.isdigit(i), type_string)) type_string = type_string[len(arg_type):] args.append(_lookup_type(arg_type, view)) arg_stack_offset = ''.join(takewhile(str.isdigit, type_string)) type_string = type_string[len(arg_stack_offset):] # we know that the first parameter is the 'self' parameter if it's not # an objc_msgSend_stret or objc_msgSendSuper_stret. Otherwise it's the # second one. if ret_type.type_class == TypeClass.NamedTypeReferenceClass: log_debug(f'return value is {ret_type}') ret_type = Type.pointer(view.arch, ret_type) args.insert(0, FunctionParameter(ret_type, 'ret_value')) if len(args) < 2: args.append(None) args[1] = FunctionParameter( Type.pointer( view.arch, (Type.named_type_from_type(self_name, view.types[self_name]) if not is_class else Type.named_type_from_type( 'class_t', view.get_type_by_name('class_t')))), 'self') else: args[0] = FunctionParameter( Type.pointer( view.arch, (Type.named_type_from_type(self_name, view.types[self_name]) if not is_class else Type.named_type_from_type( 'class_t', view.get_type_by_name('class_t')))), 'self') function_type = Type.function(ret_type, args) return function_type
def get_cfstring_token(self, cfstring_address): CFString = self.view.get_type_by_name('CFString') if CFString is None: return buffer_ptr = int.from_bytes( self.view.read( cfstring_address + CFString.structure['buffer'].offset, self.view.address_size), "little" if self.view.endianness == Endianness.LittleEndian else "big") size = int.from_bytes( self.view.read( cfstring_address + CFString.structure['length'].offset, self.view.address_size), "little" if self.view.endianness == Endianness.LittleEndian else "big") log_debug(f"buffer_ptr is {buffer_ptr}") buffer = self.view.get_ascii_string_at(buffer_ptr, 0) log_debug(f"buffer is {buffer}") if buffer is not None: buffer = buffer.value else: buffer = self.view.read(buffer_ptr, size * 2) return InstructionTextToken(InstructionTextTokenType.StringToken, f'@"{buffer}"', cfstring_address)
def visit_LLIL_REG_SSA(self, expr): log_debug("visit_LLIL_REG_SSA") if expr.value.type in ( RegisterValueType.ConstantPointerValue, RegisterValueType.ConstantValue, ): return self.analyze_constant_folding(expr)
def visit(self, expression): method_name = f"visit_{expression.__class__.__name__}" if hasattr(self, method_name): value = getattr(self, method_name)(expression) else: log_debug(f"visit_{method_name} missing") value = None return value
def set_indirect_branches_clicked(self): branches = self.table_model.branches log_debug("Setting 0x%x's indirect branches to: %s" % (self.indirect_jmp_addr, branches)) self.func.set_user_indirect_branches(self.indirect_jmp_addr, branches) self.accept()
def __lt__(self, other): result = (True if (self._ast.reaching_conditions.get( (self.start, other.start)) is not None and self._ast.reaching_conditions.get( (other.start, self.start)) is None) else True if self.start < other.start else False) log_debug(f'{self} < {other} == {result}') return result
def visit(self, expression): method_name = "visit_{}".format(expression.operation.name) if hasattr(self, method_name): value = getattr(self, method_name)(expression) else: log_debug(f"{repr(expression.operation)}") value = None return value
def find_swarm_hashes(self, data): rv = [] offset = data.find(b'\xa1ebzzr0') while offset != -1: log_debug("Adding r-- segment at: {:#x}".format(offset)) rv.append((offset, 43)) offset = data[offset+1:].find(b'\xa1ebzzr0') return rv
def discover_names(func, func_params): param = func_params[0] paramIndex = func.parameter_vars.vars.index(param) identified_functions = {} for caller in set(func.callers): logged_names = set() # Ensure that we only see one method used in this function for mlil_inst in caller.mlil.instructions: # Calls only if mlil_inst.operation != MediumLevelILOperation.MLIL_CALL: continue # Ensure that we're only acting on our calls # FIXME: There must be a better way to find the callee if not hasattr(mlil_inst.operands[1], 'constant'): continue called_func = func.view.get_function_at( mlil_inst.operands[1].constant) if called_func != func: continue call_site_param = caller.get_parameter_at(mlil_inst.address, func.function_type, paramIndex) # FIXME: There must be a better way again if str(call_site_param) == "<undetermined>": call_site_param = None logged_names.add(call_site_param) if len(logged_names) != 1 or None in logged_names: log_warn( "Unable to determine method name for function %r: Identified method names: %r" % (caller, logged_names)) continue logged_name_addr = list(logged_names)[0].value method_name = func.view.get_string_at(logged_name_addr).value if method_name not in identified_functions: identified_functions[method_name] = set() identified_functions[method_name].add(caller) # Eliminate names with multiple callers for name, callers in dict(identified_functions).items(): if len(callers) != 1: log_debug("Eliminating name %r with callers %r" % (name, callers)) del identified_functions[name] else: identified_functions[name] = list(callers)[0] return identified_functions
def _parse_structure(type_string: str, view: BinaryView) -> Type: type_name = ''.join( takewhile(lambda i: i != '=', type_string) ) type_string = type_string[len(type_name)+1:] fields = [] while type_string: if type_string[0] == '{': field_type, type_string = _parse_structure(type_string[1:], view) fields.append(field_type) elif type_string[0] == '}': type_string = type_string[1:] break elif type_string[0] == '[': array_size = ''.join(takewhile(str.isdigit, type_string[1:])) array_type = ''.join( takewhile(lambda i: i != ']', type_string[1:]) ) type_string = type_string[len(array_size)+len(array_type)+2:] fields.append( Type.array(_lookup_type(array_type, view), int(array_size)) ) elif type_string[0] == ']': type_string = type_string[1:] continue elif _lookup_type(type_string[0], view): fields.append(_lookup_type(type_string[0], view)) type_string = type_string[1:] else: log_debug(f"Not sure what is going on with this type: {type_string!r}") raise NotImplementedError(f"{type_string!r}") parsed_struct = Structure() for field in fields: parsed_struct.append(field) log_debug(f"Created {type_name}={parsed_struct}") view.define_user_type(type_name, Type.structure_type(parsed_struct)) return ( Type.named_type_from_type( type_name, view.types.get(type_name) ), type_string )
def visit_MLIL_IF(self, expr): log_debug("visit_MLIL_IF") # is this a stosb or something similar? If so, # find the largest exit index and start there. exits = self.function.get_low_level_il_exits_at(expr.address) if len(exits) > 1: return max(exits) + 1 return self.analyze_unconditional_jump(expr)
def __gt__(self, other): result = (True if (self._ast.reaching_conditions.get( (other.start, self.start)) is not None and self._ast.reaching_conditions.get( (self.start, other.start)) is None) else False if self._ast.reaching_conditions.get( (other.start, self.start)) is None else True if self.start > other.start else False) log_debug(f'{self} > {other} == {result}') return result or (self.start == other.start and self.type == 'cond')
def define_methods(view): log_debug('define_methods') view.update_analysis_and_wait() objc_getClass = view.symbols.get('_objc_getClass') class_addMethod = view.symbols.get('_class_addMethod') class_replaceMethod = view.symbols.get('_class_replaceMethod') if isinstance(objc_getClass, list): objc_getClass = next( ( s for s in objc_getClass if s.type == SymbolType.ImportedFunctionSymbol ), None ) if isinstance(class_addMethod, list): class_addMethod = next( ( s for s in class_addMethod if s.type == SymbolType.ImportedFunctionSymbol ), None ) if isinstance(class_replaceMethod, list): class_replaceMethod = next( ( s for s in class_replaceMethod if s.type == SymbolType.ImportedFunctionSymbol ), None ) if objc_getClass is not None: parse_get_class(view, objc_getClass.address) view.update_analysis_and_wait() if class_addMethod is not None: parse_added_methods(view, class_addMethod.address) if class_replaceMethod is not None: parse_added_methods(view, class_replaceMethod.address) _propagate_types(view) _propagate_stret_types(view) _add_xrefs(view)
def visit_MLIL_OR(self, expr): log_debug("visit_MLIL_OR") # If it's something like `ecx | 0` then we can NOP it # and nothing of value is lost if expr.right.value.type in (RegisterValueType.ConstantPointerValue, RegisterValueType.ConstantValue ) and expr.right.value.value == 0: self.convert_to_nop(expr.address) return self.queue_prev_block(expr)
def visit_MLIL_XOR(self, expr): log_debug("visit_MLIL_XOR") # If it's something like `ecx ^ const` and ecx isn't a known # value, then just erase it. It's not needed at all. if expr.left.value.type in ( RegisterValueType.UndeterminedValue, RegisterValueType.EntryValue, ): self.convert_to_nop(expr.address) return self.queue_prev_block(expr)
def flatten_sequence(self): log_debug("flatten_sequence") if not len(self._nodes): return flattened_nodes = [] for node in self.nodes: if node.type == "seq": flattened_nodes += node.nodes else: flattened_nodes.append(node) self._nodes = flattened_nodes
def define_cfstrings_plugin(view: BinaryView): log_debug("define_cfstrings_plugin") from_bytes = _get_from_bytes(view) cfstring_type = view.get_type_by_name('CFString') if cfstring_type is None: cfstring_type = view.platform.parse_types_from_source( _cfstring_definition).types['CFString'] view.define_user_type('CFString', cfstring_type) wchar_type = view.platform.parse_types_from_source( _wchar_definition).types['wchar'] cfstring = Type.named_type_from_type('CFString', cfstring_type) __cfstring = view.get_section_by_name('__cfstring') if __cfstring is None: return buffer = cfstring_type.structure['buffer'] length = cfstring_type.structure['length'] for addr in range(__cfstring.start, __cfstring.end, cfstring_type.width): view.define_user_data_var(addr, cfstring) for xref in view.get_data_refs(addr): view.define_user_data_var(xref, Type.pointer(view.arch, cfstring)) string_pointer = from_bytes( view.read(addr + buffer.offset, buffer.type.width)) string_length = from_bytes( view.read(addr + length.offset, length.type.width), ) + 1 string_section = view.get_sections_at(string_pointer) if not string_section: return if string_section[0].name == '__ustring': char_type = wchar_type else: char_type = Type.char() view.define_user_data_var(string_pointer, Type.array(char_type, string_length))
def visit_MLIL_ADDRESS_OF(self, expr): if expr.src.name: var_name = expr.src.name elif (expr.src.source_type == VariableSourceType.StackVariableSourceType): var_name = f'var_{abs(expr.src.storage):x}' else: var_name = expr.function.arch.get_reg_by_index(expr.src.storage) log_debug(f'var_name: {repr(var_name)}') return BitVec( f"&{var_name}", (expr.size * 8) if expr.size else expr.function.source_function.view.address_size * 8, )
def queue_prev_block(self, expr): log_debug("queue_prev_block") if isinstance(expr, MediumLevelILInstruction): ILBasicBlock = MediumLevelILBasicBlock elif isinstance(expr, LowLevelILInstruction): ILBasicBlock = LowLevelILBasicBlock else: return current_bb: ILBasicBlock = next( bb for bb in expr.function.basic_blocks if bb.start <= expr.instr_index < bb.end) log_debug( f"current_bb has {len(current_bb.incoming_edges)} incoming edges") if len(current_bb.incoming_edges) != 1: log_debug("Incoming Edges was not 1, just continuing") self.target_queue.put(expr.address) return True prev_bb = current_bb.incoming_edges[0].source while prev_bb[0].operation in ( LowLevelILOperation.LLIL_JUMP_TO, MediumLevelILOperation.MLIL_JUMP_TO, MediumLevelILOperation.MLIL_GOTO, LowLevelILOperation.LLIL_GOTO, ): if len(prev_bb.incoming_edges) != 1: log_debug("Incoming edges was not 1, stopping here") break log_debug(f"{prev_bb.incoming_edges}") if prev_bb not in prev_bb.incoming_edges[0].source.dominators: prev_bb = prev_bb.incoming_edges[0].source else: break self.target_queue.put(prev_bb.il_function[prev_bb.start].address) return True
def define_classes_plugin(view): log_debug("define_classes_plugin") define_types_plugin(view) view.session_data['ClassList'] = {} view.session_data['ClassNames'] = {} view.session_data['ClassROList'] = {} view.session_data['Protocols'] = {} class_t = Type.named_type_from_type('class_t', view.types.get('class_t')) if class_t is None: log_error("class_t is not defined!") return _define_classes(view, class_t) _define_protocols(view) _define_categories(view)
def heuristic_look_for_vul_function_ptr_calls(self, mlil_instr, var_origins): """ Looks for things like: ```C printf("%s", input); this->debug_func(input); ``` We don't know that `this->debug_func` receives a format string (and so our analysis fails) This simple heuristic finds these cases and we can then check by hand. """ if len(var_origins) != 1: return orig = var_origins[0] if not isinstance(orig, VarOriginConst) or not self.is_addr_read_only(orig.const): return orig_str = orig.get_string(self.bv) if not orig_str: return # ==================== # Restrict based on the string content if "%" not in orig_str: return # Ensure the string contains %s and that is no larger than 5 chars # This tries to include things like '%s\n' and '%s.\n' if len(orig_str) > 5 or "%s" not in orig_str: return # ==================== # Look for calls using registers in the current and next few basic blocks # If we find one it might be a vulnerable call # @@TODO: We could check if the orig of one of its params is the same as the param after the fmt string in the original ref main_bb = mlil_instr.il_basic_block bbs_to_check = [main_bb] + [x.target for x in main_bb.outgoing_edges] for bb in bbs_to_check: for instr in bb: if instr.operation == MLILOperation.MLIL_CALL and instr.dest.operation == MLILOperation.MLIL_VAR: log_debug(f"Heuristic finding {hex(instr.address)}") self.heuristic_vul_function_ptr_calls.add(instr.address)
def _add_xrefs(view: BinaryView): log_debug('_add_xrefs') method_t = view.types.get('method_t') if method_t is None: return method_t_struct = method_t.structure method_t_name = method_t_struct['name'] for function in view.functions: data_refs = view.get_data_refs(function.start) log_debug(f'{function.name}: {data_refs}') method_t_list = [ var for var in map( view.get_data_var_at, (ref for ref in data_refs) ) ] log_debug(f'{function.name}: {method_t_list}') for method in method_t_list: name_ptr = int.from_bytes( view.read(method.address + method_t_name.offset, view.address_size), "little" if view.endianness == Endianness.LittleEndian else "big" ) for xref in view.get_code_refs(name_ptr): xref_mlil = xref.function.get_low_level_il_at(xref.address).mmlil if xref_mlil is None: log_debug(f'{xref.address:x}') return if xref_mlil.operation == MediumLevelILOperation.MLIL_SET_VAR: call_mlil = next( (use for use in xref_mlil.function.get_ssa_var_uses(xref_mlil.ssa_form.dest) if (use.instr_index > xref_mlil.instr_index and use.il_basic_block == xref_mlil.il_basic_block)), None ) else: return if call_mlil is not None: xref.function.set_user_xref(call_mlil.address, function.start)
def visit_MLIL_SUB(self, expr): log_debug("visit_MLIL_SUB") # This is a top level MLIL_SUB, which means it's probably a cmp instruction if expr.function[ expr.instr_index].operation == MediumLevelILOperation.MLIL_SUB: self.convert_to_nop(expr.address) return self.queue_prev_block(expr) if expr.left.value.type in ( RegisterValueType.UndeterminedValue, RegisterValueType.EntryValue, ): # Make sure we're not accidentally NOPing a push/pop # due to the stack being in a bad state due to a weird # loop if (expr.left.operation != MediumLevelILOperation.MLIL_VAR and expr.left.src.index != self.view.arch.get_reg_index("esp")): self.convert_to_nop(expr.address) return self.queue_prev_block(expr) sub_value = expr.value if sub_value.type in ( RegisterValueType.ConstantPointerValue, RegisterValueType.ConstantValue, ): log_debug(f"sub value is {sub_value.value:x}") return self.analyze_constant_folding(expr.left) else: log_debug("sub value is not a constant ptr") return
def __lt__(self, other): log_debug(f'{self} < {other}') if self._value == ["default"]: log_debug(f'False') return False if other._value == ['default']: return True log_debug(f'{super().__lt__(other)}') return super().__lt__(other)
def visit_LLIL_SUB(self, expr): log_debug("visit_LLIL_SUB") sub_value = expr.value if sub_value.type in ( RegisterValueType.ConstantPointerValue, RegisterValueType.ConstantValue, ): log_debug(f"sub value is {sub_value.value:x}") return self.analyze_constant_folding(expr.left) else: log_debug(f"sub value is not constant ptr") return