def is_byte_swap(self): try: self.model_variable() except ModelIsConstrained: return False # Figure out if this might be a byte swap byte_values_len = len(self.byte_values) #print self.byte_values if 1 < byte_values_len <= self.var.src.var.type.width: var = create_BitVec(self.var.src, self.var.src.var.type.width) ordering = list(reversed([ self.byte_values[x] for x in sorted(self.byte_values.keys()) ])) reverse_var = Concat( *reversed([ Extract(i-1, i-8, var) for i in range(len(ordering) * 8, 0, -8) ]) ) if len(ordering) < 4: reverse_var = Concat( Extract( 31, len(ordering)*8, var ), reverse_var ) reversed_ordering = reversed(ordering) reversed_ordering = Concat(*reversed_ordering) # The idea here is that if we add the negation of this, if it's # not satisfiable, then that means there is no value such that # the equivalence does not hold. If that is the case, then this # should be a byte-swapped value. self.solver.add( Not( And( var == ZeroExt( var.size() - len(ordering)*8, Concat(*ordering) ), reverse_var == ZeroExt( reverse_var.size() - reversed_ordering.size(), reversed_ordering ) ) ) ) if self.solver.check() == unsat: return True return False
def may_write_to(self, z3_index, z3_value, storage, constraint_list, consider_length): z3_index = simplify(get_bv(z3_index)) z3_value = simplify(get_bv(z3_value)) same_slot = False add_constraints = [] # z3-index directly pointing to this slot concret_index = BitVecVal(self.slot_counter, 256) # Compare expression equivalence if eq(concret_index, z3_index): same_slot = True # If not structurally equivalent, check if there is an assignment that allows them to be equivalent if not same_slot and not are_z3_satisfiable(constraint_list + [z3_index == concret_index]): return False, None add_constraints.append(simplify(z3_index == self.slot_counter)) index_str = str(z3_index) # Rule out keccak symbolic variable as the function prevents someone from arbitrarily controlling the index if len(z3_index.children()) < 2 and index_str.startswith("keccak") \ or "+" in index_str and index_str[:index_str.index("+")].strip() in keccak_map : return False, None # Todo Here I might do something more elaborate if I see that it does actually not solve critical writings # Problem because writing to an array has a keccak offset, but if the index can be arbitrarely choosen z3 finds # a solution for the controllable symbolic variable to match the index to any slot. #sym_ind_name = extract_sym_names(z3_index) #if any([name for name in sym_ind_name if name.startswith("keccak")]) and any([name for name in sym_ind_name if not name.startswith("keccak")]): # return False, None # If the slot is or may be the same and the slot we currently analyze is the same, we found a possible write if self.bitlength == 256: return True, add_constraints # If not, the slot is still written in its entirety but the observed chunk is loaded and overwritten by itself to_bit, from_bit = self.bit_counter + self.bitlength - 1, self.bit_counter # to_bit, from_bit = BitVecVal(self.bit_counter + self.bitlength - 1, 256), BitVecVal(self.bit_counter, 256) chunk_writing = Extract(to_bit, from_bit, z3_value) chunk_content = Extract(to_bit, from_bit, get_bv(get_storage_slot(BitVecVal(self.slot_counter, 256), storage))) # if the current content of the observed chunk and the respective chunk of the written value can be different # like by a different variable assignment, then we found it if are_z3_satisfiable(constraint_list + [Not(chunk_content == chunk_writing)]): # It is actually not important to use the constraint that the values are different, overwriting with the same # value is still writing. On the other hand it avoids references to storage that later have to be solved with # intertransactional analysis although the violation can be triggered in one transaction # add_constraints.append(simplify(Not(chunk_content == chunk_writing))) return True, add_constraints # For the 256-bit chunks the last step should not be necessary, but a compiler could generate some code that # overwrites a slot content with itself. This function would have a false positive in that case. return False, None
def set_region_bit(bv, p): i = region_names.index(REGIONS[p.y][p.x]) chunks = [] if i < bits - 1: chunks.append(Extract(bits - 1, i + 1, bv)) chunks.append(BitVecVal(1, 1)) if i > 0: chunks.append(Extract(i - 1, 0, bv)) return Concat(*chunks)
def amino_bitvec_unary_restriction( amino_list: List[AminoRef] = z3_enum_aminos ) -> List[ConstraintRef]: amino_sort_size = amino_list[0].sort().size() - 1 return [ Implies( Extract(i + 1, i + 1, amino) == BitVecVal(1, 1), Extract(i, i, amino) == BitVecVal(1, 1), ) for i in range(amino_sort_size) for amino in amino_list ]
def sign_extension(formula, bit_places): """Set the rest of bits on the left to the value of the sign bit. """ sign_bit = Extract(bit_places - 1, bit_places - 1, formula) complement = sign_bit for _ in range(formula.size() - bit_places - 1): complement = Concat(sign_bit, complement) formula = Concat(complement, (Extract(bit_places - 1, 0, formula))) return formula
def evaluate_MLIL_SET_VAR_SPLIT_SSA(self, state): (ssa_variable_1, ssa_variable_2, next_instruction) = self.instruction.operands [value] = self.instructions_to_operands([next_instruction], state, self.instruction.size) value1 = Extract(value.size() - 1, value.size() // 2, value) value2 = Extract(value.size() // 2 - 1, 0, value) state.set_ssa_variable(ssa_variable_1, value1) state.set_ssa_variable(ssa_variable_2, value2) return []
def right_sign_extension(formula, bit_places): """Set the rest of bits on the right to the value of the sign bit. """ sign_bit_position = formula.size() - bit_places sign_bit = Extract(sign_bit_position, sign_bit_position, formula) complement = sign_bit for _ in range(sign_bit_position - 1): complement = Concat(sign_bit, complement) formula = Concat(Extract(formula.size() - 1, sign_bit_position, formula), complement) return formula
def zero_extension(formula, bit_places): """Set the rest of bits on the left to 0. """ complement = BitVecVal(0, formula.size() - bit_places) formula = Concat(complement, (Extract(bit_places - 1, 0, formula))) return formula
def visit_MLIL_SET_VAR_SSA(self, expr): dest = create_BitVec(expr.dest, expr.size) src = self.visit(expr.src) # If this value can never be larger than a byte, # then it must be one of the bytes in our swap. # Add it to a list to check later. if src is not None and not isinstance(src, (int, int)): value_range = identify_byte(expr.src, self.function) if value_range is not None: self.solver.add( Or(src == 0, And(src <= value_range.end, src >= value_range.step))) self.byte_vars.add(*expr.src.vars_read) if self.byte_values.get( (value_range.end, value_range.step)) is None: self.byte_values[( value_range.end, value_range.step)] = simplify( Extract( int(math.floor(math.log(value_range.end, 2))), int(math.floor(math.log(value_range.step, 2))), src)) self.visited.add(expr.dest) if expr.instr_index in self.to_visit: self.to_visit.remove(expr.instr_index) if src is not None: self.solver.add(dest == src)
def mload(self, offset: BitVecNumRef): if isinstance(offset, BitVecNumRef): offset = offset.as_long() elif not isinstance(offset, int): raise DevelopmentErorr( 'Does not support memory operations indexed by symbol variables.' ) if offset + WORDBYTESIZE > len(self.__immediate_data): # ~ index out of bounds ~ # generate a symblolic variable newmemvar = self.__generateMemoryVar() d = offset + WORDBYTESIZE - len(self.__immediate_data) if d < WORDBYTESIZE: for i in range(d): self.__immediate_data.append( Extract((d - i - 1) * 8 + 7, (d - i - 1) * 8, newmemvar)) return simplify( Concat(self.__immediate_data[offset:WORDBYTESIZE + offset])) else: self.mstore(BitVecVal256(offset), newmemvar) return newmemvar else: return simplify( Concat(self.__immediate_data[offset:WORDBYTESIZE + offset]))
def visit_MLIL_VAR_FIELD(self, expr): src = make_variable(expr.src) offset = expr.offset size = expr.size if expr.src.type.type_class == TypeClass.ArrayTypeClass: element_width = expr.src.type.element_type.width index = element_width // offset return BitVec(f'{expr.src.name}[{index}]', size * 8) if expr.src.type.type_class == TypeClass.StructureTypeClass: raise NotImplementedError() elif (expr.src.source_type == VariableSourceType.RegisterVariableSourceType): sub_register = next( (name for name, r in expr.src.function.arch.regs.items() if (r.full_width_reg == expr.src.name and r.size == size and r.offset == offset)), None) if sub_register is None: # This means that the variable was probably renamed, and it's # still just a register access. sub_register = expr.src.name return BitVec(sub_register, size * 8) else: # TODO: change this to var field name instead of extracting # because we don't actually care about this return Extract(((offset + size) * 8) - 1, (offset * 8), src)
def get_function_from_constraints(contract, constraints, is_constructor=False): """ Makes assumptions on the currently executed contract function, based on the constraint list. THe function identifier passed in the call data is checked in the contract bytecode to jump to the function implementation. A check for it is therefore contained in the constraint list. :param contract: the contract that was symbolically executed. :param constraints: the constraints of symbolic execution. :param is_constructor: whether the constructor is executed. :return: The currently executed contract function. """ default_func = None for func in contract.functions: # Returns the constructor function when found, if it is known to be currently executed if func.isConstructor and is_constructor: return func for constraint in constraints: # Compares function hashes and == constraints. if len(func.hash) > 0: function_constraint = Extract( 255, 224, BitVec('calldata_' + contract.name + "[0]", 256)) == int(func.hash, 16) if eq(simplify(function_constraint), constraint): return func if not func.isConstructor: # Return the default function if one exists an no other function was identified if func.signature == "()": default_func = func return default_func
def right_one_extension(formula, bit_places): """Set the rest of bits on the right to 1. """ complement = BitVecVal(0, formula.size() - bit_places) - 1 formula = Concat( Extract(formula.size() - 1, formula.size() - bit_places, formula), complement) return formula
def visit_MLIL_VAR_PHI(self, expr): # MLIL_VAR_PHI doesn't set the size field, so we make do # with this. dest = create_BitVec(expr.dest, expr.dest.var.type.width) phi_values = [] for var in expr.src: if var not in self.visited: var_def = self.function.get_ssa_var_definition(var) self.to_visit.append(var_def) src = create_BitVec(var, var.var.type.width) # If this value can never be larger than a byte, # then it must be one of the bytes in our swap. # Add it to a list to check later. if src is not None and not isinstance(src, (int, int)): value_range = identify_byte(var, self.function) if value_range is not None: self.solver.add( Or( src == 0, And(src <= value_range.end, src >= value_range.step))) self.byte_vars.add(var) if self.byte_values.get( (value_range.end, value_range.step)) is None: self.byte_values[(value_range.end, value_range.step)] = simplify( Extract( int( math.floor( math.log( value_range.end, 2))), int( math.floor( math.log( value_range.step, 2))), src)) phi_values.append(src) if phi_values: phi_expr = reduce(lambda i, j: Or(i, j), [dest == s for s in phi_values]) self.solver.add(phi_expr) self.visited.add(expr.dest) if expr.instr_index in self.to_visit: self.to_visit.remove(expr.instr_index)
def get_function_from_constraints(contract, constraints): # Todo first we could search for constraints that could be a restriction to the function hash # Todo a calldata length > 4 constraint could be searched for to for function in contract.functions: function_constraint = Extract( 255, 224, BitVec("calldata_" + contract.name + "[0]", 256)) == int(function.hash, 16) for constraint in constraints: if eq(constraint, function_constraint): return function return None
def visit_MLIL_VAR_SSA_FIELD(self, expr): if expr.src not in self.visited: var_def = expr.function.get_ssa_var_definition(expr.src) if var_def is not None: self.to_visit.append(var_def) var = create_BitVec(expr.src, expr.src.var.type.width) if expr.offset == 0: return None field = Extract(((expr.size + expr.offset) * 8) - 1, expr.offset * 8, var) return field
def mstore8(self, offset: BitVecNumRef, value: BitVecRef): if isinstance(offset, BitVecNumRef): offset = checkBitVecRef256(offset).as_long() elif not isinstance(offset, int): raise DevelopmentErorr( 'Does not support memory operations indexed by symbol variables.' ) #checkBitVecRef256(value) if offset >= len(self.__immediate_data): d = offset - len(self.__immediate_data) + 1 self.__immediate_data.extend([zero8bit() for _ in range(d)]) self.__immediate_data[offset] = simplify(Extract(7, 0, value))
def set_function_id(self, fid: BitVecRef = None): if isinstance(fid, str) and len(fid) == 8: fid = BitVecVal(int(fid, 16), 32) elif isinstance(fid, int): fid = BitVecVal(fid, 32) elif isinstance(fid, BitVecRef) and fid.size() == 32: pass elif fid is None: fid = BitVec('function_id', 32) else: raise SettingError('illegal function id given') for i in range(4): fragment = Extract(i * 8 + 7, i * 8, fid) self.mstore8(3 - i, fragment)
def byte_(self, global_state): mstate = global_state.mstate op0, op1 = mstate.stack.pop(), mstate.stack.pop() try: index = util.get_concrete_int(op0) offset = (31 - index) * 8 result = Concat(BitVecVal(0, 248), Extract(offset + 7, offset, op1)) except AttributeError: logging.debug("BYTE: Unsupported symbolic byte offset") result = BitVec(str(simplify(op1)) + "_" + str(simplify(op0)), 256) mstate.stack.append(simplify(result)) return [global_state]
def byte_(self, global_state): mstate = global_state.mstate op0, op1 = mstate.stack.pop(), mstate.stack.pop() if not isinstance(op1, ExprRef): op1 = BitVecVal(op1, 256) try: index = util.get_concrete_int(op0) offset = (31 - index) * 8 if offset >= 0: result = simplify(Concat(BitVecVal(0, 248), Extract(offset + 7, offset, op1))) else: result = 0 except AttributeError: logging.debug("BYTE: Unsupported symbolic byte offset") result = BitVec(str(simplify(op1)) + "[" + str(simplify(op0)) + "]", 256) mstate.stack.append(result) return [global_state]
def get_call_data( global_state: GlobalState, memory_start: Union[int, ExprRef], memory_size: Union[int, ExprRef], ): """ Gets call_data from the global_state :param global_state: state to look in :param memory_start: Start index :param memory_size: Size :return: Tuple containing: call_data array from memory or empty array if symbolic, type found """ state = global_state.mstate transaction_id = "{}_internalcall".format(global_state.current_transaction.id) try: # TODO: This only allows for either fully concrete or fully symbolic calldata. # Improve management of memory and callata to support a mix between both types. calldata_from_mem = state.memory[ util.get_concrete_int(memory_start) : util.get_concrete_int( memory_start + memory_size ) ] i = 0 starting_calldata = [] while i < len(calldata_from_mem): elem = calldata_from_mem[i] if isinstance(elem, int): starting_calldata.append(elem) i += 1 else: # BitVec for j in range(0, elem.size(), 8): starting_calldata.append(Extract(j + 7, j, elem)) i += 1 call_data = Calldata(transaction_id, starting_calldata) call_data_type = CalldataType.CONCRETE logging.debug("Calldata: " + str(call_data)) except TypeError: logging.debug("Unsupported symbolic calldata offset") call_data_type = CalldataType.SYMBOLIC call_data = Calldata("{}_internalcall".format(transaction_id)) return call_data, call_data_type
def execute(self, state): """ Execute instruction that this class was initialized with. :state: Current active state """ operation = self.instruction.operation.name log.log_debug("Evaluating {}: {} @ {}".format( operation, self.instruction, hex(self.instruction.address))) try: if self.instruction.value.is_constant: size = self.instruction.size * 8 return [BitVecVal(self.instruction.value.value, size)] except AttributeError: pass executor = getattr(self, "evaluate_" + operation, None) if executor is not None: result = executor(state) else: raise NotImplementedError(repr(operation)) for i in range(len(result)): width = self.instruction.size * 8 if operation.endswith("_DP"): # Double precision width = width * 2 if width < result[i].size(): result[i] = Extract(width - 1, 0, result[i]) if width > result[i].size(): result[i] = ZeroExt(width - result[i].size(), result[i]) log.log_debug("Completed {}: {} @ {}".format( operation, self.instruction, hex(self.instruction.address))) return result
def mstore(self, offset: BitVecNumRef, value: BitVecRef): if not isinstance(offset, BitVecNumRef) and not isinstance( offset, int): raise DevelopmentErorr( 'Does not support memory operations indexed by symbol variables.' ) offset = offset.as_long() if isinstance(offset, BitVecNumRef) else offset checkBitVecRef256(value) if offset + WORDBYTESIZE > len(self.__immediate_data): d = offset + WORDBYTESIZE - len(self.__immediate_data) self.__immediate_data.extend([zero8bit() for _ in range(d)]) # for dict # # for i in range(self.__size(), offset + WORDBYTESIZE): # self.__memdata[str(i)] = zero8bit() # for i in range(WORDBYTESIZE): self.__immediate_data[offset + (WORDBYTESIZE - 1 - i)] = Extract( i * 8 + 7, i * 8, value)
def evaluate_MLIL_VAR_SSA_FIELD(self, state): (ssa_variable, offset) = self.instruction.operands var = state.get_ssa_variable(ssa_variable) return [Extract(31, offset, var)]
def to_smt(r): # type: (Rtl) -> Tuple[List[ExprRef], Z3VarMap] """ Encode a concrete primitive Rtl r sa z3 query. Returns a tuple (query, var_m) where: - query is a list of z3 expressions - var_m is a map from Vars v with non-BVType to their correspodning z3 bitvector variable. """ assert r.is_concrete() # Should contain only primitives primitives = set(PRIMITIVES.instructions) assert set(d.expr.inst for d in r.rtl).issubset(primitives) q = [] # type: List[ExprRef] m = {} # type: Z3VarMap # Build declarations for any bitvector Vars var_to_bv = {} # type: Z3VarMap for v in r.vars(): typ = v.get_typevar().singleton_type() if not isinstance(typ, BVType): continue var_to_bv[v] = BitVec(v.name, typ.bits) # Encode each instruction as a equality assertion for d in r.rtl: inst = d.expr.inst exp = None # type: ExprRef # For prim_to_bv/prim_from_bv just update var_m. No assertion needed if inst == prim_to_bv: assert isinstance(d.expr.args[0], Var) m[d.expr.args[0]] = var_to_bv[d.defs[0]] continue if inst == prim_from_bv: assert isinstance(d.expr.args[0], Var) m[d.defs[0]] = var_to_bv[d.expr.args[0]] continue if inst in [bvadd, bvult]: # Binary instructions assert len(d.expr.args) == 2 and len(d.defs) == 1 lhs = d.expr.args[0] rhs = d.expr.args[1] df = d.defs[0] assert isinstance(lhs, Var) and isinstance(rhs, Var) if inst == bvadd: # Normal binary - output type same as args exp = (var_to_bv[lhs] + var_to_bv[rhs]) else: assert inst == bvult exp = (var_to_bv[lhs] < var_to_bv[rhs]) # Comparison binary - need to convert bool to BitVec 1 exp = If(exp, BitVecVal(1, 1), BitVecVal(0, 1)) exp = mk_eq(var_to_bv[df], exp) elif inst == bvzeroext: arg = d.expr.args[0] df = d.defs[0] assert isinstance(arg, Var) fromW = arg.get_typevar().singleton_type().width() toW = df.get_typevar().singleton_type().width() exp = mk_eq(var_to_bv[df], ZeroExt(toW - fromW, var_to_bv[arg])) elif inst == bvsignext: arg = d.expr.args[0] df = d.defs[0] assert isinstance(arg, Var) fromW = arg.get_typevar().singleton_type().width() toW = df.get_typevar().singleton_type().width() exp = mk_eq(var_to_bv[df], SignExt(toW - fromW, var_to_bv[arg])) elif inst == bvsplit: arg = d.expr.args[0] assert isinstance(arg, Var) arg_typ = arg.get_typevar().singleton_type() width = arg_typ.width() assert (width % 2 == 0) lo = d.defs[0] hi = d.defs[1] exp = And( mk_eq(var_to_bv[lo], Extract(width // 2 - 1, 0, var_to_bv[arg])), mk_eq(var_to_bv[hi], Extract(width - 1, width // 2, var_to_bv[arg]))) elif inst == bvconcat: assert isinstance(d.expr.args[0], Var) and \ isinstance(d.expr.args[1], Var) lo = d.expr.args[0] hi = d.expr.args[1] df = d.defs[0] # Z3 Concat expects hi bits first, then lo bits exp = mk_eq(var_to_bv[df], Concat(var_to_bv[hi], var_to_bv[lo])) else: assert False, "Unknown primitive instruction {}".format(inst) q.append(exp) return (q, m)
def main(): """Heyawake solver example.""" sym = grilops.SymbolSet([("B", chr(0x2588)), ("W", " ")]) lattice = grilops.get_rectangle_lattice(HEIGHT, WIDTH) sg = grilops.SymbolGrid(lattice, sym) # Rule 1: Painted cells may never be orthogonally connected (they may not # share a side, although they can touch diagonally). for p in lattice.points: sg.solver.add( Implies( sg.cell_is(p, sym.B), And(*[n.symbol != sym.B for n in sg.edge_sharing_neighbors(p)]) ) ) # Rule 2: All white cells must be interconnected (form a single polyomino). rc = grilops.regions.RegionConstrainer( lattice, sg.solver, complete=False) white_region_id = Int("white_region_id") sg.solver.add(white_region_id >= 0) sg.solver.add(white_region_id < HEIGHT * WIDTH) for p in lattice.points: sg.solver.add( If( sg.cell_is(p, sym.W), rc.region_id_grid[p] == white_region_id, rc.region_id_grid[p] == -1 ) ) # Rule 3: A number indicates exactly how many painted cells there must be in # that particular room. region_cells = defaultdict(list) for p in lattice.points: region_cells[REGIONS[p.y][p.x]].append(sg.grid[p]) for region, count in REGION_COUNTS.items(): sg.solver.add(PbEq([(c == sym.B, 1) for c in region_cells[region]], count)) # Rule 4: A room which has no number may contain any number of painted cells, # or none. # Rule 5: Where a straight (orthogonal) line of connected white cells is # formed, it must not contain cells from more than two rooms—in other words, # any such line of white cells which connects three or more rooms is # forbidden. region_names = sorted(list(set(c for row in REGIONS for c in row))) bits = len(region_names) def set_region_bit(bv, p): i = region_names.index(REGIONS[p.y][p.x]) chunks = [] if i < bits - 1: chunks.append(Extract(bits - 1, i + 1, bv)) chunks.append(BitVecVal(1, 1)) if i > 0: chunks.append(Extract(i - 1, 0, bv)) return Concat(*chunks) for p in lattice.points: for n in sg.edge_sharing_neighbors(p): bv = reduce_cells( sg, p, n.direction, set_region_bit(BitVecVal(0, bits), p), lambda acc, c, ap: set_region_bit(acc, ap), lambda acc, c, sp: c == sym.B ) popcnt = Sum(*[BV2Int(Extract(i, i, bv)) for i in range(bits)]) sg.solver.add(Implies(sg.cell_is(p, sym.W), popcnt <= 2)) if sg.solve(): sg.print() print() if sg.is_unique(): print("Unique solution") else: print("Alternate solution") sg.print() else: print("No solution")
from opcodes import BYTE from rule import Rule from z3 import BitVec, BitVecVal, Concat, Extract """ Checks that the byte opcode (implemented using shift) is equivalent to a canonical definition of byte using extract. """ rule = Rule() n_bits = 256 x = BitVec('X', n_bits) for i in range(0, 32): # For Byte, i = 0 corresponds to most significant bit # But for extract i = 0 corresponds to the least significant bit lsb = 31 - i rule.check( BYTE(BitVecVal(i, n_bits), x), Concat(BitVecVal(0, n_bits - 8), Extract(8 * lsb + 7, 8 * lsb, x)))
def MULMOD(x, y, m): return If(m == 0, 0, Extract(x.size() - 1, 0, URem(ZeroExt(x.size(), x) * ZeroExt(x.size(), y), ZeroExt(m.size(), m))))
def visit_MLIL_VAR_FIELD(self, expr): src = make_variable(expr.src) offset = expr.offset size = expr.size return Extract(((offset + size) * 8) - 1, (offset * 8), src)
def ADDMOD(x, y, m): return If(m == 0, 0, Extract(x.size() - 1, 0, URem(ZeroExt(1, x) + ZeroExt(1, y), ZeroExt(1, m))))