def slice_exp(exp, left, right): size = sub_op(right, left) logger.debug(f"slicing {exp}, offset {left} bytes, until {right} bytes") # e.g. mem[32 len 10], 2, 4 == mem[34,2] if m := match(exp, ("mem", ("range", ":rleft", ":rlen"))): rleft, rlen = m.rleft, m.rlen if safe_le_op(add_op(left, size), rlen): return ("mem", ("range", add_op(rleft, left), size)) else: return None
def splits_len(split_list): sum_range = 0 for el in split_list: el_range = el[0] sum_range = add_op(sum_range, el_range[2]) return sum_range
def apply_mask_to_range(memloc, size, offset): op, range_pos, range_len = memloc assert op == "range" size_bytes, size_bits = to_bytes(size) offset_bytes, offset_bits = to_bytes(offset) assert offset_bits == size_bits == 0, (offset_bits, size_bits) # for now assert safe_le_op(add_op(size_bytes, offset_bytes), range_len) is True, ( size_bytes, offset_bytes, range_len, ) # otherwise we need to learn to handle that range_pos = add_op(range_pos, sub_op(range_len, add_op(size_bytes, offset_bytes))) range_len = size_bytes # sub_op(range_len, add_op(offset_bytes, size_bytes)) return ("range", range_pos, range_len)
def range_contains(outer, inner): # checks if outer range *fully* contains inner range op, outer_begin, outer_len = outer assert op == "range" op, inner_begin, inner_len = inner assert op == "range" outer_end = add_op(outer_begin, outer_len) inner_end = add_op(inner_begin, inner_len) try: if not le_op(outer_begin, inner_begin): return False if not le_op(inner_end, outer_end): return False return True except CannotCompare: return None
def range_overlaps(range1, range2): op, r1_begin, r1_len = range1 assert op == "range" op, r2_begin, r2_len = range2 assert op == "range" r1_end = add_op(r1_begin, r1_len) r2_end = add_op(r2_begin, r2_len) try: if lt_op(r2_begin, r1_begin): r1_begin, r1_end, r2_begin, r2_end = r2_begin, r2_end, r1_begin, r1_end # r1 begins before r2 for sure now if le_op(r1_end, r2_begin) is True: return False else: return True except CannotCompare: return None
def memloc_overwrite(memloc, split): # returns mem ranges excluding the ones that are *for sure* overwritten by 'split' # e.g. overwrites(('range', 64, 32), ('range', 70, 10)) -> [('range', 64, 6), (range, 80, 16)] # e.g. overwrites(('range', 64, 32), ('range', 70, 'unknown')) -> [('range', 64, 32)], bc. 'unknown' can be 0 op, m_left, m_len = memloc assert op == "range" op, s_left, s_len = split assert op == "range" m_right = add_op(m_left, m_len) s_right = add_op(s_left, s_len) if safe_le_op(m_right, s_left) is True: # split after memory - no overlap return [memloc] if safe_le_op(s_right, m_left) is True: # split before memory - no overlap return [memloc] left_len = sub_op(s_left, m_left) right_len = sub_op(m_right, s_right) range_left = ("range", m_left, left_len) range_right = ("range", s_right, right_len) left_ge_zero, right_ge_zero = safe_ge_zero(left_len), safe_ge_zero( right_len) if left_ge_zero is None or right_ge_zero is None: # we can't compare some numbers, conservatively return whole range return [memloc] res = [] if safe_ge_zero(left_len) is True and left_len != 0: res.append(range_left) if safe_ge_zero(right_len) is True and right_len != 0: res.append(range_right) return res
def _fill_mem(exp, split, split_val): if exp == ("mem", split): return split_val op, memloc = exp assert op == "mem" op, m_left, m_len = memloc assert op == "range" op, s_left, s_len = split assert op == "range" m_right = add_op(m_left, m_len) s_right = add_op(s_left, s_len) logger.debug(f"orig memloc: {m_left} len {m_len} right {m_right}") logger.debug(f"split memloc: {s_left} len {s_len} right {s_right}") if ( safe_le_op(m_right, s_left) is not False ): # if the split is before memory, or we can't compare - not replacing logger.debug("split before memory or can't compare - not replacing") return exp if safe_le_op(s_right, m_left) is not False: # -,,- after memory logger.debug("split after memory or can't compare - not replacing") return exp left = safe_max_op(s_left, m_left) right = safe_min_op(s_right, m_right) logger.debug(f"split begins at {left} ends at {right}") if left is None or right is None: return exp # if we can't figure out which one is smaller/larger, we're not replacing memloc, memloc_max = replace_max_with_MAX(memloc) split, split_max = replace_max_with_MAX(split) # 'max' op tends to mess up with all the algebra stuff, so we're replacing # it with a variable 'MAX' for the time being if split_max != memloc_max: logger.warning("different maxes") return exp # by now we know: # - the split overlaps memory for sure # - we know the boundaries of split # - so we now return data (before_split, split_val, after_split) res_left = slice_exp(exp, 0, sub_op(left, m_left)) if res_left is None: return exp logger.debug(f"value left untouched on left: {res_left}") res_right = slice_exp(exp, sub_op(right, m_left), sub_op(m_right, m_left)) if res_right is None: return exp logger.debug(f"value right untouched on right: {res_right}") res = [] if safe_gt_zero(sizeof(res_left)) is True: logger.debug("size of left untouched > 0, adding to output") res.append(res_left) elif safe_gt_zero(sizeof(res_left)) is None: logger.debug("we don't know if left size > 0, aborting") return exp center_in_start = sub_op(left, s_left) center_in_len = sub_op(right, s_left) logger.debug( f"inserted value offset {center_in_start}, length {center_in_len}") logger.debug(f"cutting this out of {split_val}") res_center = slice_exp(split_val, center_in_start, center_in_len) logger.debug(f"inserted value after slicing: {res_center}") if res_center is None: return exp if safe_ge_zero(sizeof(res_center)) is True: res.append(res_center) else: assert False, sizeof( res_center) # this shouldn't happen considering the above checks? if safe_ge_zero(sizeof(res_right)) is True: if sizeof(res_right) != 0: res.append(res_right) elif safe_ge_zero(sizeof(res_right)) is None: return exp assert None not in res return ("data", ) + tuple(res)
def splits_mem(memloc, split, memval, split_val=None): # returns memory values we can be confident of, after overwriting the split part of memory op, m_left, m_len = memloc assert op == "range" op, s_left, s_len = split assert op == "range" m_right = add_op(m_left, m_len) s_right = add_op(s_left, s_len) logger.debug(f"applying split [{s_left} (len {s_len}) {s_right}]") logger.debug(f" to [{m_left} (len {m_len}) {m_right}]") if not safe_ge_zero(s_len): s_len = "undefined" s_right = add_op(s_left, s_len) if safe_le_op(m_right, s_left) is True: # split after memory - no overlap return [(memloc, memval)] if safe_le_op(s_right, m_left) is True: # split before memory - no overlap return [(memloc, memval)] left = safe_max_op(s_left, m_left) right = safe_min_op(s_right, m_right) logger.debug(f"split overwrites memory from {left} to {right}") # left/right relative to beginning of memory location in_left = sub_op(left, m_left) in_right = sub_op(right, m_left) logger.debug(f"that is, relative to memloc {in_left} to {in_right}") if safe_le_op(in_left, m_len) is not True or left is None: logger.debug( f"we are not sure that m_len: {m_len} is bigger than beginning of split, returning []" ) return [] assert in_left == 0 if safe_le_op(right, m_left) else True val_left = slice_exp(memval, 0, in_left) if left is not None else None val_right = (slice_exp(memval, in_right, sub_op(m_right, m_left)) if right is not None else None) res = [] left_len = sub_op(left, m_left) # sizeof(val_left) right_len = sub_op(m_right, right) if safe_ge_zero( left_len) is True and left_len != 0 and val_left is not None: res.append((("range", m_left, left_len), val_left)) if split_val is not None: center_left = safe_max_op(m_left, s_left) center_right = safe_min_op(m_right, s_right) center_len = sub_op(center_right, center_left) if is_array(opcode(split_val)): # in ARRAY_OPCODES: # mem[a len b] = calldata[x len b] # log mem[c len d] # -> calldata[x+ c - a, center_len] arr_offset, arr_len = split_val[1:] center_offset = add_op(arr_offset, sub_op(center_left, s_left)) center_val = (opcode(split_val), center_offset, center_len) else: center_offset = sub_op(s_right, center_right) center_val = mask_op( split_val, size=mul_op(center_len, 8), offset=mul_op(center_offset, 8), shr=mul_op(center_offset, 8), ) center_range = ("range", center_left, center_len) if safe_ge_zero(center_len) and center_len != 0: res.append((center_range, center_val)) if safe_ge_zero( right_len) is True and right_len != 0 and val_right is not None: res.append((("range", right, right_len), val_right)) return res
mem_len = m.mem_idx[2] ret_rows.append((bits(mem_len), 0, row)) continue if m := match(row, ("storage", ":size", ":off", ":idx")): ret_rows.append((m.size, 0, row)) continue if opcode(row) != "mask_shl": return [(256, 0, value)] assert opcode(row) == "mask_shl" _, size, offset, shl, value = row stor_size = size stor_offset = add_op(offset, shl) shl = sub_op(shl, stor_offset) if type(value) == int: value = apply_mask(value, size, offset, shl) elif (m := match(value, ("mem", ":idx"))) and add_op(offset, shl) == 0: new_memloc = apply_mask_to_range(m.idx, size, offset) value = ("mem", new_memloc) else: value = mask_op(value, size=size, offset=offset, shl=shl) ret_rows.append(( stor_size, stor_offset, value,
def eval_bool(exp, known_true=True, symbolic=True): if exp == known_true: return True if is_zero(exp) == known_true: return False if exp == is_zero(known_true): return False if type(exp) == int: return exp > 0 if exp in (True, False): return True if opcode(exp) == "bool": return eval_bool(exp[1], known_true=known_true, symbolic=symbolic) if opcode(exp) == "iszero": e = eval_bool(exp[1], known_true=known_true, symbolic=symbolic) if e is not None: return not e if opcode(exp) == "or": res = 0 for e in exp[1:]: ev = eval_bool(e, known_true=known_true, symbolic=symbolic) if ev is None: return None res = res or ev return res #'ge', 'gt', 'eq' - tbd if opcode(exp) in ["le", "lt"] and opcode(exp) == opcode(known_true): if exp[1] == known_true[1]: # ('le', x, sth) while ('le', x, sth2) is known to be true if eval_bool((opcode(exp), known_true[2], exp[2])) is True: return True if not symbolic: r = eval(exp) if type(r) == int: return r != 0 return None if opcode(exp) == "le": left = eval(exp[1]) right = eval(exp[2]) if left == right: return True if type(left) == int and type(right) == int: return left <= right try: return algebra.le_op(left, right) except Exception: return None if opcode(exp) == "lt": left = eval(exp[1]) right = eval(exp[2]) if left == right: return False if type(left) == int and type(right) == int: return left < right try: return algebra.lt_op(left, right) except Exception: return None if opcode(exp) == "gt": left = eval(exp[1]) right = eval(exp[2]) if type(left) == int and type(right) == int: return left > right if left == right: return False try: # a > b iff b < a iff b+1 <= a le = algebra.lt_op(algebra.add_op(left, 1), right, 1) logger.debug("le %s %s %s", le, left, right) if le == True: return False if le == False: return True if le is None: return None except Exception: pass if opcode(exp) == "ge": left = eval(exp[1]) right = eval(exp[2]) if type(left) == int and type(right) == int: return left >= right if left == right: return True try: lt = algebra.lt_op(left, right) if lt == True: return False if lt == False: return True if lt is None: return None except Exception: pass if opcode(exp) == "eq": left = eval(exp[1]) right = eval(exp[2]) if left == right: return True if algebra.sub_op(left, right) == 0: return True return None
def handle_call(self, op, trace): stack = self.stack gas = stack.pop() addr = stack.pop() if op == "call": wei = stack.pop() else: assert op == "staticcall" wei = 0 arg_start = stack.pop() arg_len = stack.pop() ret_start = stack.pop() ret_len = stack.pop() if addr == 4: # Identity m = mem_load(arg_start, arg_len) trace(("setmem", ("range", ret_start, arg_len), m)) stack.append("memcopy.success") elif type(addr) == int and addr in precompiled: m = mem_load(arg_start, arg_len) args = mem_load(arg_start, arg_len) var_name = precompiled_var_names[addr] trace(("precompiled", var_name, precompiled[addr], args)) trace(("setmem", ("range", ret_start, ret_len), ("var", var_name))) stack.append("{}.result".format(precompiled[addr])) else: assert op in ("call", "staticcall") call_trace = ( op, gas, addr, wei, ) if arg_len == 0: call_trace += None, None elif arg_len == 4: call_trace += mem_load(arg_start, 4), None else: fname = mem_load(arg_start, 4) fparams = mem_load(add_op(arg_start, 4), sub_op(arg_len, 4)) call_trace += fname, fparams trace(call_trace) # trace(('comment', mem_load(arg_start, arg_len))) self.call_len = ret_len stack.append("ext_call.success") try: if lt_op(0, ret_len): return_data = ("ext_call.return_data", 0, ret_len) trace(("setmem", ("range", ret_start, ret_len), return_data)) except CannotCompare: return_data = ("ext_call.return_data", 0, ret_len) trace(("setmem", ("range", ret_start, ret_len), return_data))
def apply_stack(self, ret, line): def trace(exp, *format_args): try: logger.debug("Trace: %s", str(exp).format(*format_args)) except Exception: pass if type(exp) == str: ret.append(exp.format(*format_args)) else: ret.append(exp) stack = self.stack op = line[1] previous_len = stack.len() if "--verbose" in sys.argv or "--explain" in sys.argv: trace(C.asm(" " + str(stack))) trace("") if "push" not in op and "dup" not in op and "swap" not in op: trace("[{}] {}", line[0], C.asm(op)) else: if type(line[2]) == str: trace("[{}] {} {}", line[0], C.asm(op), C.asm(" ”" + line[2] + "”")) elif line[2] > 0x1000000000: trace("[{}] {} {}", line[0], C.asm(op), C.asm(hex(line[2]))) else: trace("[{}] {} {}", line[0], C.asm(op), C.asm(str(line[2]))) param = 0 if len(line) > 2: param = line[2] if op in [ "exp", "and", "eq", "div", "lt", "gt", "slt", "sgt", "mod", "xor", "signextend", "smod", "sdiv", ]: stack.append(arithmetic.eval((op, stack.pop(), stack.pop(),))) elif op[:4] == "push": stack.append(param) elif op == "pop": stack.pop() elif op == "dup": stack.dup(param) elif op == "mul": stack.append(mul_op(stack.pop(), stack.pop())) elif op == "or": stack.append(or_op(stack.pop(), stack.pop())) elif op == "add": stack.append(add_op(stack.pop(), stack.pop())) elif op == "sub": left = stack.pop() right = stack.pop() if type(left) == int and type(right) == int: stack.append(arithmetic.sub(left, right)) else: stack.append(sub_op(left, right)) elif op in ["mulmod", "addmod"]: stack.append(("mulmod", stack.pop(), stack.pop(), stack.pop())) elif op == "shl": off = stack.pop() exp = stack.pop() if all_concrete(off, exp): stack.append(exp << off) else: stack.append(mask_op(exp, shl=off)) elif op == "shr": off = stack.pop() exp = stack.pop() if all_concrete(off, exp): stack.append(exp >> off) else: stack.append(mask_op(exp, offset=minus_op(off), shr=off)) elif op == "sar": off = stack.pop() exp = stack.pop() if all_concrete(off, exp): sign = exp & (1 << 255) if off >= 256: if sign: stack.append(2 ** 256 - 1) else: stack.append(0) else: shifted = exp >> off if sign: shifted |= (2 ** 256 - 1) << (256 - off) stack.append(shifted) else: # FIXME: This won't give the right result... stack.append(mask_op(exp, offset=minus_op(off), shr=off)) elif op in ["not", "iszero"]: stack.append((op, stack.pop())) elif op == "sha3": p = stack.pop() n = stack.pop() res = mem_load(p, n) self.counter += 1 vname = f"_{self.counter}" vval = ( "sha3", res, ) trace(("setvar", vname, vval)) stack.append(("var", vname)) elif op == "calldataload": stack.append(("cd", stack.pop(),)) elif op == "byte": val = stack.pop() num = stack.pop() off = sub_op(256, to_bytes(num)) stack.append(mask_op(val, 8, off, shr=off)) elif op == "selfbalance": stack.append(("balance", "address",)) elif op == "balance": addr = stack.pop() if opcode(addr) == "mask_shl" and addr[:4] == ("mask_shl", 160, 0, 0): stack.append(("balance", addr[4],)) else: stack.append(("balance", addr,)) elif op == "swap": stack.swap(param) elif op[:3] == "log": p = stack.pop() s = stack.pop() topics = [] param = int(op[3]) for i in range(param): el = stack.pop() topics.append(el) trace(("log", mem_load(p, s),) + tuple(topics)) elif op == "sload": sloc = stack.pop() stack.append(("storage", 256, 0, sloc)) elif op == "sstore": sloc = stack.pop() val = stack.pop() trace(("store", 256, 0, sloc, val)) elif op == "mload": memloc = stack.pop() self.counter += 1 vname = f"_{self.counter}" trace(("setvar", vname, ("mem", ("range", memloc, 32)))) stack.append(("var", vname)) elif op == "mstore": memloc = stack.pop() val = stack.pop() trace(("setmem", ("range", memloc, 32), val,)) elif op == "mstore8": memloc = stack.pop() val = stack.pop() trace(("setmem", ("range", memloc, 8), val,)) elif op == "extcodecopy": addr = stack.pop() mem_pos = stack.pop() code_pos = stack.pop() data_len = stack.pop() trace( ( "setmem", ("range", mem_pos, data_len), ("extcodecopy", addr, ("range", code_pos, data_len)), ) ) elif op == "codecopy": mem_pos = stack.pop() call_pos = stack.pop() data_len = stack.pop() if (type(call_pos), type(data_len)) == ( int, int, ) and call_pos + data_len < len(self.loader.binary): res = 0 for i in range(call_pos - 1, call_pos + data_len - 1): res = res << 8 res += self.loader.binary[ i ] # this breaks with out of range for some contracts # may be because we're usually getting compiled code binary # and not runtime binary trace( ("setmem", ("range", mem_pos, data_len), res) ) # ('bytes', data_len, res))) else: trace( ( "setmem", ("range", mem_pos, data_len), ("code.data", call_pos, data_len,), ) ) elif op == "codesize": stack.append(len(self.loader.binary)) elif op == "calldatacopy": mem_pos = stack.pop() call_pos = stack.pop() data_len = stack.pop() if data_len != 0: call_data = ("call.data", call_pos, data_len) # call_data = mask_op(('call.data', bits(add_op(data_len, call_pos))), size=bits(data_len), shl=bits(call_pos)) trace(("setmem", ("range", mem_pos, data_len), call_data)) elif op == "returndatacopy": mem_pos = stack.pop() ret_pos = stack.pop() data_len = stack.pop() if data_len != 0: return_data = ("ext_call.return_data", ret_pos, data_len) # return_data = mask_op(('ext_call.return_data', bits(add_op(data_len, ret_pos))), size=bits(data_len), shl=bits(ret_pos)) trace(("setmem", ("range", mem_pos, data_len), return_data)) elif op == "call": self.handle_call(op, trace) elif op == "staticcall": self.handle_call(op, trace) elif op == "delegatecall": gas = stack.pop() addr = stack.pop() arg_start = stack.pop() arg_len = stack.pop() ret_start = stack.pop() ret_len = stack.pop() call_trace = ( "delegatecall", gas, addr, ) # arg_start, arg_len, ret_start, ret_len) if arg_len == 0: fname = None fparams = None elif arg_len == 4: fname = mem_load(arg_start, 4) fparams = 0 else: fname = mem_load(arg_start, 4) fparams = mem_load(add_op(arg_start, 4), sub_op(arg_len, 4)) call_trace += (fname, fparams) trace(call_trace) self.call_len = ret_len stack.append("delegate.return_code") if 0 != ret_len: return_data = ("delegate.return_data", 0, ret_len) trace(("setmem", ("range", ret_start, ret_len), return_data)) elif op == "callcode": gas = stack.pop() addr = stack.pop() value = stack.pop() arg_start = stack.pop() arg_len = stack.pop() ret_start = stack.pop() ret_len = stack.pop() call_trace = ( "callcode", gas, addr, value, ) if arg_len == 0: fname = None fparams = None elif arg_len == 4: fname = mem_load(arg_start, 4) fparams = 0 else: fname = mem_load(arg_start, 4) fparams = mem_load(add_op(arg_start, 4), sub_op(arg_len, 4)) call_trace += (fname, fparams) trace(call_trace) self.call_len = ret_len stack.append("callcode.return_code") if 0 != ret_len: return_data = ("callcode.return_data", 0, ret_len) trace(("setmem", ("range", ret_start, ret_len), return_data)) elif op == "create": wei, mem_start, mem_len = stack.pop(), stack.pop(), stack.pop() call_trace = ("create", wei) code = mem_load(mem_start, mem_len) call_trace += (code,) trace(call_trace) stack.append("create.new_address") elif op == "create2": wei, mem_start, mem_len, salt = ( stack.pop(), stack.pop(), stack.pop(), stack.pop(), ) call_trace = ("create2", wei, ("mem", ("range", mem_start, mem_len)), salt) trace(call_trace) stack.append("create2.new_address") elif op == "pc": stack.append(line[0]) elif op == "msize": self.counter += 1 vname = f"_{self.counter}" trace(("setvar", vname, "msize")) stack.append(("var", vname)) elif op in ("extcodesize", "extcodehash", "blockhash"): stack.append((op, stack.pop(),)) elif op in [ "callvalue", "caller", "address", "number", "gas", "origin", "timestamp", "chainid", "difficulty", "gasprice", "coinbase", "gaslimit", "calldatasize", "returndatasize", ]: stack.append(op) else: # TODO: Maybe raise an error directly? assert op not in [ "jump", "jumpi", "revert", "return", "stop", "jumpdest", "UNKNOWN", ] if stack.len() - previous_len != opcode_dict.stack_diffs[op]: logger.error("line: %s", line) logger.error("stack: %s", stack) logger.error( "expected %s, got %s stack diff", opcode_dict.stack_diffs[op], stack.len() - previous_len, ) assert False, f"opcode {op} not processed correctly" stack.cleanup()