def and_op(*args): assert len(args) > 1 left = args[0] if len(args) > 2: right = and_op(*args[1:]) else: right = args[1] if type(left) == int and type(right) == int: return left & right res = tuple() if opcode(left) == 'and': res += left[1:] else: res += (left, ) if opcode(right) == 'and': res += right[1:] else: res += (right, ) return ('and', ) + res
def add_to_arr(exp): if exp ~ ('add', :left, :right): if opcode(left) == 'loc': right, left = left, right if opcode(right) == 'loc': return ('array', left, right)
def split_or(value): orig_value = value if opcode(value) not in ('or', 'mask_shl'): return [(256,0,value)] if opcode(value) == 'mask_shl': value = ('or', value) opcode_, *terms = value assert opcode_ == 'or' ret_rows = [] for row in terms: if m := match(row, ('bool', ':arg')): row = ('mask_shl', 8, 0, 0, ('bool', m.arg)) # does weird things if size == 1, in loops.activateSafeMode if row == 'caller': row = ('mask_shl', 160, 0, 0, 'caller') # does weird things if size == 1, in loops.activateSafeMode if row == 'block.timestamp': row = ('mask_shl', 64, 0, 0, 'caller') # does weird things if size == 1, in loops.activateSafeMode if m := match(row, ('mul', 1, ':val')): row = m.val
def cleanup(self): stack = self.stack for i, s in enumerate(stack): if type(stack[i]) == tuple: if s[0] == 'lt' and type(s[1]) == int and type(s[2]) == int: if s[1] < s[2]: stack[i] = ('bool', 1) else: stack[i] = ('bool', 0) elif s[0] == 'iszero' and type(s[1]) == int: if s[1] == 0: stack[i] = ('bool', 1) else: stack[i] = ('bool', 0) elif s[0] == 'iszero' and opcode(s[1]) == 'bool' and type(s[1][1]) == int: stack[i] = ('bool', 1 - s[1][1]) elif stack[i][0] == 'iszero' and opcode(stack[i][1]) == 'iszero': if opcode(stack[i][1][1]) in ('iszero', 'eq', 'lt', 'gt', 'slt', 'sgt'): stack[i] = stack[i][1][1] else: stack[i] = ('bool', stack[i][1][1])
def ends_exec(path): # check if all the subpaths end execution # only checking the last line, previous ones may end execution as well # but at least one leading up to the last line didn't - otherwise # we wouldn't see it line = path[-1] if opcode(line) in ( "return", "stop", "selfdestruct", "invalid", "assert_fail", "revert", "continue", "undefined", ): return True elif opcode(line) == "or": assert len(line) == 3 return ends_exec(line[1]) and ends_exec(line[2]) elif opcode(line) == "while": # well, 'while True' ends execution, but all the other # ones most likely don't. if we miss some cases nothing # bad will happen - just slightly less readable code return False else: return False
def extract_variables(exp): if type(exp) == int: return set() if opcode(exp) in ( 'var', 'mem', 'cd', 'storage', 'call.data', 'sha3', 'calldatasize', ) or is_array(opcode(exp)): return set([exp]) if type(exp) == str and exp in ('x', 'y', 'z', 'sth', 'unknown', 'undefined', 'callvalue', 'number', 'timestamp', 'address'): return set([exp]) if type(exp) == str and exp != 'data' and 'data' in exp: return set([exp]) if type(exp) != tuple: return set([exp]) res = set() for e in exp[1:]: res = res.union(extract_variables(e)) return res
def prettify(exp, rem_bool=False, parentheses=True, top_level=False, add_color=False): col = partial(colorize, add_color=add_color) pret = partial(prettify, add_color=add_color, parentheses=False) if rem_bool: exp = simplify_bool(exp) if opcode(exp) == 'bool': return prettify(exp, rem_bool=rem_bool, parentheses=parentheses, top_level=top_level, add_color=add_color) if type(exp) == int and exp % (24 * 3600) == 0 and exp > 24 * 3600: exp = ('mul', exp//3600, 24, 3600) if type(exp) == int and exp % 3600 == 0 and exp > 3600: exp = ('mul', exp//3600, 3600) # also tried return col('seconds(', COLOR_GRAY) + '1 hour' + col(')', COLOR_GRAY) # but seemed less intuitive, e.g. 0xf64B584972FE6055a770477670208d737Fff282f calcMaxWithdraw # and 3600 every programmer should know, by heart, means 1 hour :) # # also, not tackling single minutes because too often they are not time related if type(exp) in (int, float): return pretty_num(exp, add_color) if opcode(exp) in precompiled.values(): return f'{exp[0]}({pret(exp[1])})' if exp ~ ('arr', int:num, ('mask_shl', _, _, _, str:s)) \ and len(s) == num+2: return s
def unmake_fands(exp): if opcode(exp) == "for": return ("or",) + exp[1:] elif opcode(exp) == "fand": return ("and",) + exp[1:] else: return exp
def format_exp(exp): if type(exp) == str: return f'"{exp}"' if type(exp) == int: if exp > 10 ** 6 and exp % 10 ** 6 != 0: return hex(exp) else: return str(exp) elif type(exp) != list: return str(exp) else: if len(exp) == 0: return COLOR_GRAY + "[]" + ENDC if type(opcode(exp)) == list: return ( COLOR_GRAY + "[" + ENDC + f"{COLOR_GRAY}, {ENDC}".join([format_exp(e) for e in exp]) + COLOR_GRAY + "]" + ENDC ) else: return ( COLOR_GRAY + "[" + ENDC + f"{COLOR_GRAY}, {ENDC}".join( [opcode(exp)] + [format_exp(e) for e in exp[1:]] ) + COLOR_GRAY + "]" + ENDC )
def as_paths(trace, path = None): assert type(trace) == list path = path or tuple() # self.find_offsets() trace = replace_f(trace, make_fands) for line in trace: if opcode(line) == 'if': # assumes 'ifs' end trace cond, if_true, if_false = line[1], line[2], line[3] return as_paths(if_true, path + (cond, )) + as_paths(if_false, path + (is_zero(cond), )) if opcode(line) == 'LOOP': path += (('LOOP', line[2]), ) return as_paths(line[1], path) path += (line, ) # pprint_logic() return (list(path), )
def replace_names_in_assoc(names, storages_assoc): for pattern, name in names.items(): if opcode(pattern) == "bool": continue if opcode(pattern) == "struct": stor_id = pattern else: stor_id = storages_assoc[pattern] if m := match(stor_id, ("stor", ":size", ":off", ("loc", ":num"))): size, off, num = m.size, m.off, m.num # if we found a simple getter for a storage number, # we need to check first if a given location is only accessed # this way. otherwise it may be a function like getLength, that # returns the array length, and we don't want to use it as a storage name if all( match(pattern, ("stor", Any, Any, ("loc", Any))) for pattern in storages_assoc if get_loc(pattern) == num): used_locs.add(stor_id) for src, pattern in storages_assoc.items(): if pattern == stor_id: storages_assoc[src] = ("stor", size, off, ("name", name, num)) elif ((m := match(stor_id, ("stor", Any, Any, ("map", Any, ":loc")))) or (m := match(stor_id, ("stor", Any, Any, ("array", Any, ":loc")))) or (m := match(stor_id, ("struct", ":loc")))):
def continue_loops(self, root): loop_list = find_nodes(root, lambda n: n.trace is not None and \ len(n.trace) == 1 and \ opcode(n.trace[0]) == 'loop') for node in loop_list: assert node.trace is not None assert len(node.trace) == 1 assert opcode(node.trace[0]) == 'loop' line = node.trace[0] loop_dest, stack, new_stack, vars = line[1:] if loop_dest.is_label(): old_stack = loop_dest.stack beginvars = loop_dest.label.begin_vars set_vars = [] for _, var_idx, val, stack_pos in beginvars: sv = ('setvar', var_idx, stack[stack_pos]) set_vars.append(sv) if len(list(set_vars)) == 0: folded, var_list = fold_stacks(old_stack, stack, loop_dest.label.depth) node.trace = None node.set_label(loop_dest, tuple(var_list), folded) continue node.trace = [('goto', loop_dest, tuple(set_vars))] else: node.trace = None node.set_label(loop_dest, tuple(vars), new_stack)
def deserialize(trace): res = [] for line in trace: line_t = tuple(line) if opcode(line_t) == "while": _, cond, path, lid, setvars = line_t cond = tuplify(cond) setvars = tuplify(setvars) assert type(lid) == str path = deserialize(path) res.append(("while", cond, path, lid, setvars)) elif opcode(line_t) == "if": _, cond, if_true, if_false = line_t cond = tuplify(cond) if_true = deserialize(if_true) if_false = deserialize(if_false) res.append(("if", cond, if_true, if_false)) else: res.append(tuplify(line)) return res
def lt_op(left, right): # left < right if type(left) == int and type(right) == int: return left < right if (m := match(left, ("add", ":int:num", ":max"))) and opcode(m.max) == "max": terms = m.max[1:] left = ("max",) + tuple(add_op(t, m.num) for t in terms)
def make_fands(exp): # see `ferlan.getOrderDataClaim` for why it's necessary if opcode(exp) == "or": return ("for",) + exp[1:] elif opcode(exp) == "and": return ("fand",) + exp[1:] else: return exp
def add_to_arr(exp): if m := match(exp, ("add", ":left", ":right")): left, right = m.left, m.right if opcode(left) == "loc": right, left = left, right if opcode(right) == "loc": return ("array", left, right)
def max_to_add(exp): if opcode(exp) != "max": return exp exp = exp[1:] for e in exp: if opcode(e) != "add" and type(e) != int: return simplify_max(("max",) + exp) for e in exp: if type(e) == int: m = min( x if type(x) == int else ( x[1] if type(x) == tuple and len(x) > 1 and type(x[1]) == int else 0 ) for x in exp ) # used to be x[1] but 0x0000136DAE58AFCF1EDd2071973d4a7a6fbe98A5 didn't work res = ("max", e - m) for e2 in exp: if e2 != e: res += (sub_op(e2, m),) return ("add", m, res) m = 10 ** 20 for e in exp: if type(e[1]) != int: m = 0 break else: m = min(m, e[1]) common = [] first = exp[0] for f in first: if all(f in e[1:] for e in exp[1:]): common.append(f) if len(common) > 0: a = add_op(m, *common) else: a = m res = [] for e in exp: res.append(sub_op(e, a)) if type(a) == int: prefix = (a,) else: prefix = a[1:] return ("add",) + prefix + (simplify_max(("max",) + tuple(res)),)
def fold_ands(exp): assert opcode(exp) == "and" res = tuple() for e in exp[1:]: if opcode(e) == "and": e = fold_ands(e) res += e[1:] else: res += (e, ) return ("and", ) + res
def fold_ands(exp): assert opcode(exp) == 'and' res = tuple() for e in exp[1:]: if opcode(e) == 'and': e = fold_ands(e) res += e[1:] else: res += (e, ) return ('and', ) + res
def replace_names_in_assoc(names, storages_assoc): for pattern, name in names.items(): if opcode(pattern) == 'bool': continue if opcode(pattern) == 'struct': stor_id = pattern else: stor_id = storages_assoc[pattern] if stor_id ~ ('stor', :size, :off, ('loc', :num)):
def simplify_max(exp): if opcode(exp) != "max": return exp res = ("max",) for e in exp[1:]: if opcode(e) == "max": res += e[1:] else: res += (e,) return res
def add_op(*args): if len(args) == 1: return args[0] elif len(args) == 0: return 0 assert len(args) > 1 assert 'mul' not in args # some old bug, it's ok for ['mul'..] to be in args, but not 'mul' directly # speed optimisation real = 0 for r in args: if type(r) in (int, float): real += r else: break else: return real # / speed res = flatten_adds(list(args)) for idx, r in enumerate(res): if opcode(r) != 'mul': res[idx] = mul_op(1, r) real = 0 symbolic = [] for r in res: assert opcode(r) != 'add' if type(r) in [int,float]: real += r continue assert opcode(r) == 'mul' # look at all the previously found symbolic expressions # perhaps you can add to the previous one - if so, do it # else, add this as a new symbolic exp for idx, rr in enumerate(symbolic): tried = try_add(r, rr) or try_add(rr, r) if tried is not None and tried != 0: if tried ~ ('mul', ...): symbolic[idx] = tried elif tried ~ ('mask_shl', int:size, 0, 256-size, :val): symbolic[idx] = ('mul', 2**(256-size), val)
def simplify_max(exp): if opcode(exp) != 'max': return exp if opcode(exp) == 'max': res = ('max', ) for e in exp[1:]: if opcode(e) == 'max': res += e[1:] else: res += (e, ) return res
def simplify_bool(exp): if opcode(exp) == "iszero": inside = simplify_bool(exp[1]) if opcode(inside) == "iszero": return inside[1] else: # this had a bug and it went on unnoticed. does this check ever get executed? return is_zero(inside) if opcode(exp) == "bool": return exp[1] return exp
def replace_max_with_MAX(exp): if opcode(exp) != 'max': return exp, None exp = max_to_add(exp) res = exp for e in exp: if opcode(e) == 'max': res = e exp = replace(exp, res, 'MAX') exp = simplify(exp) return exp, res
def flatten_adds(exp): res = exp while len([a for a in res if opcode(a) == "add"]) > 0: exp = [] for r in res: if opcode(r) == "add": assert len(r[1:]) > 1 exp += r[1:] else: exp.append(r) res = exp return res
def replace_max_with_MAX(exp): if opcode(exp) != "max": return exp, None exp = max_to_add(exp) res = exp for e in exp: if opcode(e) == "max": res = e exp = replace(exp, res, "MAX") exp = simplify(exp) return exp, res
def find_storage_names(functions): res = {} for func in functions: if func.getter: getter = func.getter assert opcode(getter) in ("storage", "struct", "bool") # func name into potential storage name new_name = func.name if new_name[:3] == "get" and len(new_name.split("(")[0]) > 3: new_name = new_name[3:] if new_name != new_name.upper(): # otherwise we get stuff like bILLIONS in 0xF0160428a8552AC9bB7E050D90eEADE4DDD52843 new_name = new_name[0].lower() + new_name[1:] new_name = new_name.split("(")[0] if match(getter, ("storage", 160, ...)): if (("address" not in new_name.lower()) and ("addr" not in new_name.lower()) and ("account" not in new_name.lower()) and ("owner" not in new_name.lower())): new_name += "Address" res[getter] = new_name return res
def merge_ifs(path): # detects if-else sections that have the same beginnings, and moves # if upstream, merging some of the code assert type(path) == list ret = [] for idx, line in enumerate(path): assert type(line) != list if opcode(line) != 'if': ret.append(line) continue elif len(line) == 3: # one-sided if cond, if_true, if_false = line[1], merge_ifs(line[2]), merge_ifs(path[idx+1:]) lines, merged = try_merge_ifs(cond, if_true, if_false) ret.extend(lines) ret.append(merged[:3]) ret.extend(merged[3]) break else: assert len(line) == 4 cond, if_true, if_false = line[1], merge_ifs(line[2]), merge_ifs(line[3]) lines, merged = try_merge_ifs(cond, if_true, if_false) ret.extend(lines) ret.append(merged) # don't break return ret
def split_or(value): orig_value = value if opcode(value) not in ('or', 'mask_shl'): return [(256,0,value)] if opcode(value) == 'mask_shl': value = ('or', value) assert value ~ ('or', *terms) ret_rows = [] for row in terms: if row ~ ('bool', :arg):