def is_factor(x, y): ''' return True if x is a factor of y will return True for any 2 numbers because we use floating point ''' a, b = symath.wilds('a b') val = symath.WildResults() if x == y: return True elif isinstance(x, symath.Number) and isinstance(y, symath.Number): return True elif y.match(a * b, val): return is_factor(x, val.a) or is_factor(x, val.b) elif y.match(a + b, val): return is_factor(x, val.a) and is_factor(x, val.b) elif y.match(a - b, val): return is_factor(x, val.a) and is_factor(x, val.b) else: return False
def get_version(self, version): w,v = wilds('w v') val = WildResults() new_oplist = filter(lambda x: x[0].match(w(v), val) and val.v.value() <= version, self._oplist) rv = VersionedSet() rv.version = version rv._oplist = new_oplist return rv
def density(graph, loops=True): ''' returns: GraphDensity(density : float) ''' ns, es = wilds('ns es') vals = WildResults() if summation(graph).match(GraphSummation(ns, es), vals): pes = vals.ns.n * (vals.ns.n if loops else vals.ns.n - 1) return GraphDensity(vals.es.n / pes) else: raise BaseException("Unexpected result from summation()")
def density(graph, loops=True): """ returns: GraphDensity(density : float) """ ns, es = wilds("ns es") vals = WildResults() if summation(graph).match(GraphSummation(ns, es), vals): pes = vals.ns.n * (vals.ns.n if loops else vals.ns.n - 1) return GraphDensity(vals.es.n / pes) else: raise BaseException("Unexpected result from summation()")
def _convert(exp): a, b = symath.wilds('a b') vals = symath.WildResults() if exp.match(a < b, vals): return _convert(vals.a) < _convert(vals.b) elif exp.match(a > b, vals): return _convert(vals.a) > _convert(vals.b) elif exp.match(symath.stdops.Equal(a, b), vals): return _convert(vals.a) == _convert(vals.b) elif exp.match(a <= b, vals): return _cnvert(vals.a) <= _convert(vals.b) elif exp.match(a >= b, vals): return _convert(vals.a) >= _convert(vals.b) elif exp.match(a + b, vals): return _convert(vals.a) + _convert(vals.b) elif exp.match(a - b, vals): return _convert(vals.a) - _convert(vals.b) elif exp.match(a * b, vals): return _convert(vals.a) * _convert(vals.b) elif exp.match(a / b, vals): return _convert(vals.a) / _convert(vals.b) elif exp.match(a ^ b, vals): return _convert(vals.a) ^ _convert(vals.b) elif exp.match(a & b, vals): return _convert(vals.a) & _convert(vals.b) elif exp.match(a | b, vals): return _convert(vals.a) | _convert(vals.b) elif exp.match(a**b, vals): return _convert(vals.a)**_convert(vals.b) elif exp.match(symath.stdops.LogicalAnd(a, b), vals): return z3.And(_convert(vals.a), _convert(vals.b)) elif exp.match(symath.stdops.LogicalOr(a, b), vals): return z3.Or(_convert(vals.a), _convert(vals.b)) elif exp.match(symath.stdops.LogicalXor(a, b), vals): return z3.Or(z3.And(_convert(vals.a), z3.Not(_convert(vals.b))), z3.And(_convert(vals.b), z3.Not(_convert(vals.a)))) elif isinstance(exp, symath.Symbol) and exp.is_integer: return z3.Int(exp.name) elif isinstance(exp, symath.Symbol) and exp.is_bool: return z3.Bool(exp.name) elif isinstance(exp, symath.Symbol) and exp.is_bitvector > 0: return z3.BitVec(exp.name, exp.is_bitvector) elif isinstance(exp, symath.Symbol): return z3.Real(exp.name) elif isinstance(exp, symath.core._KnownValue): return exp.value() else: raise BaseException( "Invalid argument (%s) (type: %s) passed to z3 solver" % (exp, type(exp)))
def test_wilds_dont_substitute(self): ''' it is implicitly assumed that substitute is too "dumb" to account for wilds in some places in the code base, this makes sure that doesnt change without making sure the rest of the code base is updated ''' a,b = symath.wilds('a b') x,y = symath.symbols('x y') subs = { x(a): x(x) } self.assertEqual(x(y).substitute(subs), x(y)) self.assertEqual(x(a).substitute(subs), x(x)) # this one *should* substitute self.assertEqual(x(b).substitute(subs), x(b))
def __contains__(self, item): w = wilds('w') for op,i in self._oplist: if i != item: continue if op.match(VER_DELETE(w)): return False elif op.match(VER_ADD(w)): return True else: raise Exception("INVALID OPERATION FOUND: %s" % (op))
def reg_size(reg): a,b = wilds('a b') val = WildResults() if reg in (AX,BX,CX,DX,DI,SI,BP,SP): return symbolic(2) elif reg in (AL,AH,BL,BH,CL,CH,DL,DH): return symbolic(1) elif reg in (EAX,EBX,ECX,EDX,EDI,ESI,EBP,ESP,EFLAGS): return symbolic(4) elif reg.match(DEREF(a, b), val): return val.a else: raise BaseException('Unknown Register %s' % reg)
def forward_data_flow(source, ea=None, calldepth=0): if ea == None: ea = ScreenEA() _clear_colored() inst, dst, src = wilds("inst dst src") w = WildResults() tainted = VersionedSet() tainted.version = -1 tainted.add(source) def _fix_esp(ea, exp): spd = GetSpd(ea) return exp.substitute({esp: (esp + spd).simplify()}) fg = FunctionGraph(ea) # data connections graph TAINTED = symbols("TAINTED") dg = DirectedGraph() dg.connect(TAINTED, source) for addr, level in fg.walk(ea, depthfirst=True): if level <= tainted.version: print "reverting to version %s" % (level - 1) tainted = tainted.get_version(level - 1) tainted.version = level syminst = symdecode(addr) if syminst.match(inst(dst, src), w) and w.inst in tainted_dst_src_insts: print "analyzing %s" % (syminst,) # untaint cleared registers if syminst.match(XOR(dst, dst)) and w.dst in tainted: tainted.remove(w.dst) elif w.src in tainted: _color(addr) print "tainting %s" % (w.dst,) tainted.add(w.dst) elif w.dst in tainted: tainted.remove(w.dst) return tainted
def test_wilds_dont_substitute(self): ''' it is implicitly assumed that substitute is too "dumb" to account for wilds in some places in the code base, this makes sure that doesnt change without making sure the rest of the code base is updated ''' a, b = symath.wilds('a b') x, y = symath.symbols('x y') subs = {x(a): x(x)} self.assertEqual(x(y).substitute(subs), x(y)) self.assertEqual(x(a).substitute(subs), x(x)) # this one *should* substitute self.assertEqual(x(b).substitute(subs), x(b))
def _convert(exp): a,b = symath.wilds('a b') vals = symath.WildResults() if exp.match(a < b, vals): return _convert(vals.a) < _convert(vals.b) elif exp.match(a > b, vals): return _convert(vals.a) > _convert(vals.b) elif exp.match(symath.stdops.Equal(a, b), vals): return _convert(vals.a) == _convert(vals.b) elif exp.match(a <= b, vals): return _cnvert(vals.a) <= _convert(vals.b) elif exp.match(a >= b, vals): return _convert(vals.a) >= _convert(vals.b) elif exp.match(a + b, vals): return _convert(vals.a) + _convert(vals.b) elif exp.match(a - b, vals): return _convert(vals.a) - _convert(vals.b) elif exp.match(a * b, vals): return _convert(vals.a) * _convert(vals.b) elif exp.match(a / b, vals): return _convert(vals.a) / _convert(vals.b) elif exp.match(a ^ b, vals): return _convert(vals.a) ^ _convert(vals.b) elif exp.match(a & b, vals): return _convert(vals.a) & _convert(vals.b) elif exp.match(a | b, vals): return _convert(vals.a) | _convert(vals.b) elif exp.match(a ** b, vals): return _convert(vals.a) ** _convert(vals.b) elif exp.match(symath.stdops.LogicalAnd(a, b), vals): return z3.And(_convert(vals.a), _convert(vals.b)) elif exp.match(symath.stdops.LogicalOr(a, b), vals): return z3.Or(_convert(vals.a), _convert(vals.b)) elif exp.match(symath.stdops.LogicalXor(a, b), vals): return z3.Or(z3.And(_convert(vals.a), z3.Not(_convert(vals.b))), z3.And(_convert(vals.b), z3.Not(_convert(vals.a)))) elif isinstance(exp, symath.Symbol) and exp.is_integer: return z3.Int(exp.name) elif isinstance(exp, symath.Symbol) and exp.is_bool: return z3.Bool(exp.name) elif isinstance(exp, symath.Symbol) and exp.is_bitvector > 0: return z3.BitVec(exp.name, exp.is_bitvector) elif isinstance(exp, symath.Symbol): return z3.Real(exp.name) elif isinstance(exp, symath.core._KnownValue): return exp.value() else: raise BaseException("Invalid argument (%s) (type: %s) passed to z3 solver" % (exp, type(exp)))
def complexity(graph): """ ONLY HAS MEANING FOR A CONTROL FLOW GRAPH calculates the cycolmatic complexity of a CFG returns: GraphComplexity(complexity : int) """ ns, es = wilds("ns es") vals = WildResults() if summation(graph).match(GraphSummation(ns, es), vals): return GraphComplexity((vals.es - vals.ns + len(graph.exit_nodes) * 2).simplify().n) else: raise BaseException("Unexpected result from summation()")
def _get_set(self): v = wilds('v') removed = set([]) values = set([]) for op,i in self._oplist: if op.match(VER_ADD(v)) and i not in removed: values.add(i) elif op.match(VER_DELETE(v)): removed.add(i) return values
def _get_src_value(op, context): if isinstance(op, symath.Number): return op if is_register(op): op = reg_mask(op) a,b = symath.wilds('a b') vals = symath.WildResults() op = op.substitute(context).simplify() if op.match(DEREF(a, b), vals): if op in context: return context[op].simplify() return op.simplify()
def complexity(graph): ''' ONLY HAS MEANING FOR A CONTROL FLOW GRAPH calculates the cycolmatic complexity of a CFG returns: GraphComplexity(complexity : int) ''' ns, es = wilds('ns es') vals = WildResults() if summation(graph).match(GraphSummation(ns, es), vals): return GraphComplexity( (vals.es - vals.ns + len(graph.exit_nodes) * 2).simplify().n) else: raise BaseException("Unexpected result from summation()")
def _cleanup_derefs(exp): a,b,c = symbolic.wilds('a b c') val = symbolic.WildResults() if exp.match(a & DEREF(b, c), val): if (exp[1] & 0xff) == symbolic.symbolic(0xff) and exp[2][1] == symbolic.symbolic(0x1): exp = exp[2] if (exp[1] & 0xffff) == symbolic.symbolic(0xffff) and exp[2][1] == symbolic.symbolic(0x2): exp = exp[2] if (exp[1] & 0xffffffff) == symbolic.symbolic(0xffffffff) and exp[2][1] == symbolic.symbolic(0x4): exp = exp[2] if exp[0] == DEREF: if exp in known: exp = known[exp] return exp
def _idisplay(exp): x,y,z,n = wilds('x y z n') ws = WildResults() if exp.match(x ** y, ws): return r"{%s} ^ {%s}" % (_idisplay(ws.x), _idisplay(ws.y)) elif exp in _greek: return r'\%s' % (str(exp),) elif exp.match(-1 * x, ws): return r'-{%s}' % (_idisplay(ws.x),) elif exp.match(x + y, ws): return r'{%s} + {%s}' % (_idisplay(ws.x), _idisplay(ws.y)) elif exp.match(x - y, ws): return r'{%s} - {%s}' % (_idisplay(ws.x), _idisplay(ws.y)) elif exp.match(x * y, ws): return r'{%s} {%s}' % (_idisplay(ws.x), _idisplay(ws.y)) elif exp.match(x / y, ws): return r'\frac{%s}{%s}' % (_idisplay(ws.x), _idisplay(ws.y)) elif exp.match(x ^ y, ws): return r'{%s} \oplus {%s}' % (_idisplay(ws.x), _idisplay(ws.y)) elif exp.match(functions.Exp(x), ws): return r'e^{%s}' % (_idisplay(ws.x),) elif exp.match(x(y), ws) and ws.x in [ functions.ArcCos, functions.ArcSin, functions.ArcTan, functions.Cos, functions.Sin, functions.Tan]: return r'\%s{%s}' % (str(ws.x).lower(), _idisplay(ws.y)) elif exp.match(stdops.Equal(x,y), ws): return r'%s = %s' % (_idisplay(ws.x), _idisplay(ws.y)) elif exp.match(functions.Sum(n, x), ws): return r'\sum_{%s}{%s}' % (_idisplay(ws.n), _idisplay(ws.x)) else: return str(exp)
def test_edit_distance(self): from symath.algorithms.editdistance import edit_distance,edit_substitutions x,y,z = symath.symbols('x y z') a,b,c = symath.wilds('a b c') self.assertEqual(edit_distance(x(x, y, x), y(x, x, x)), 2) self.assertEqual(edit_distance(x(y, x), x(y, y, x)), 1) self.assertEqual(edit_distance(x(y, x), x(x)), 1) self.assertEqual(edit_distance(x(y, y, x), x(x)), 2) self.assertEqual(edit_distance(a, x(y, y)), 0) self.assertEqual(edit_distance(a(x, x), x(y, x)), 1) self.assertNotEqual(edit_distance(y(x(a, b), x(b, a)), y(x(a, b), x(a, b))), 0) self.assertEqual(edit_distance(y(x(a, b), x(b, a)), y(x(a, b), x(a, b))), 2) self.assertEqual(edit_distance(y(x(a, b), x(b, a)), x(x(a, b), x(a, b))), 3) exp1 = y(x, x, x, y, y, x, x, x, y) exp2 = y(x, y, y, x, y, x, x, y) self.assertEqual(edit_distance(exp1, exp2), 3)
def test_edit_distance(self): from symath.algorithms.editdistance import edit_distance, edit_substitutions x, y, z = symath.symbols('x y z') a, b, c = symath.wilds('a b c') self.assertEqual(edit_distance(x(x, y, x), y(x, x, x)), 2) self.assertEqual(edit_distance(x(y, x), x(y, y, x)), 1) self.assertEqual(edit_distance(x(y, x), x(x)), 1) self.assertEqual(edit_distance(x(y, y, x), x(x)), 2) self.assertEqual(edit_distance(a, x(y, y)), 0) self.assertEqual(edit_distance(a(x, x), x(y, x)), 1) self.assertNotEqual( edit_distance(y(x(a, b), x(b, a)), y(x(a, b), x(a, b))), 0) self.assertEqual( edit_distance(y(x(a, b), x(b, a)), y(x(a, b), x(a, b))), 2) self.assertEqual( edit_distance(y(x(a, b), x(b, a)), x(x(a, b), x(a, b))), 3) exp1 = y(x, x, x, y, y, x, x, x, y) exp2 = y(x, y, y, x, y, x, x, y) self.assertEqual(edit_distance(exp1, exp2), 3)
def get_coefficient(y, x): ''' divides y by x and returns - only works if x is a factor of y ''' assert is_factor(x, y) assert x != 1 a,b,c = symath.wilds('a b c') val = symath.WildResults() if y == x: return symath.symbolic(1) if y.match(a * b, val): if is_factor(x, val.a): return get_coefficient(val.a, x) * val.b else: return get_coefficient(val.b, x) * val.a elif y.match(c(a, b), val): return val.c(get_coefficient(val.a, x), get_coefficient(val.b, x))
def solve(self): a,b = symath.wilds('a b') solver = z3.Solver() for i in self: if \ i.match(symath.stdops.LogicalAnd(a, b)) or \ i.match(symath.stdops.LogicalOr(a, b)) or \ i.match(symath.stdops.LogicalXor(a, b)) or \ i.match(symath.stdops.Equal(a, b)) or \ i.match(a < b) or \ i.match(a > b) or \ i.match(a <= b) or \ i.match(a >= b) or \ i.match(a <= b): solver.add(_convert(i)) else: solver.add(_convert(i) == 0) if solver.check() != z3.sat: return None return Result(solver.model())
def get_coefficient(y, x): ''' divides y by x and returns - only works if x is a factor of y ''' assert is_factor(x, y) assert x != 1 a, b, c = symath.wilds('a b c') val = symath.WildResults() if y == x: return symath.symbolic(1) if y.match(a * b, val): if is_factor(x, val.a): return get_coefficient(val.a, x) * val.b else: return get_coefficient(val.b, x) * val.a elif y.match(c(a, b), val): return val.c(get_coefficient(val.a, x), get_coefficient(val.b, x))
def solve(self): a, b = symath.wilds('a b') solver = z3.Solver() for i in self: if \ i.match(symath.stdops.LogicalAnd(a, b)) or \ i.match(symath.stdops.LogicalOr(a, b)) or \ i.match(symath.stdops.LogicalXor(a, b)) or \ i.match(symath.stdops.Equal(a, b)) or \ i.match(a < b) or \ i.match(a > b) or \ i.match(a <= b) or \ i.match(a >= b) or \ i.match(a <= b): solver.add(_convert(i)) else: solver.add(_convert(i) == 0) if solver.check() != z3.sat: return None return Result(solver.model())
def test_replace_all(self): a,b,c = symath.wilds('a b c') x,y,z = symath.wilds('x y z') term = (x & (y | z)) self.assertEqual(y | z, symath.replace(term, { a & b: b }))
def test_replace_all(self): a, b, c = symath.wilds('a b c') x, y, z = symath.wilds('x y z') term = (x & (y | z)) self.assertEqual(y | z, symath.replace(term, {a & b: b}))
def setUp(self): self.w, self.v = symath.wilds('w v') self.x, self.y = symath.symbols('x y') self.head = symath.symbols('head')
def test_symbol_inequal_wild(self): a = symath.wilds('a') sa = symath.symbols('a') self.assertNotEqual(sa, a)
def test_match_dont_extract_wilds_that_are_equal(self): a, b = symath.wilds('a b') vals = symath.WildResults() a(b).match(a(b), vals) self.assertEqual(len(vals), 0)
def test_match_dont_extract_wilds_that_are_equal(self): a,b = symath.wilds('a b') vals = symath.WildResults() a(b).match(a(b), vals) self.assertEqual(len(vals), 0)
def execute_instruction(ist, context): a,b,c = symath.wilds('a b c') vals = symath.WildResults() def _set_big_reg(dst, src): if dst in (AX,BX,CX,DX,SI,DI,BP,SP): edst = symath.symbols('E' + dst.name) context[edst.simplify()] = ((edst.substitute(context) & 0xffff0000) | src).simplify() elif dst in (AL,BL,CL,DL): edst = symath.symbols('E' + dst.name[0] + 'X') context[edst.simplify()] = ((edst.substitute(context) & 0xffffff00) | src).simplify() elif dst in (AH,BH,CH,DH): edst = symath.symbols('E' + dst.name[0] + 'X') context[edst.simplify()] = ((edst.substitute(context) & 0xffff00ff) | (src << 8)).simplify() elif dst.match(DEREF(a, b)): regsonly = {} for k in context: if not k.match(DEREF(a, b)): regsonly[k] = context[k] context[dst.substitute(regsonly).simplify()] = src.simplify() else: context[dst.simplify()] = src.simplify() # and our giant switch statement, you knew there had to be one ;) if ist.match(Mov(a, b), vals): src = _get_src_value(vals.b, context) _set_big_reg(vals.a, src) # TODO: fixup movsx to actually be a signed operation elif ist.match(Movzx(a, b), vals) or ist.match(Movsx(a, b), vals): src = _get_src_value(vals.b, context) _set_big_reg(vals.a, src) elif ist.match(Lea(a, DEREF(b, c)), vals): src = _get_src_value(vals.c, context) _set_big_reg(vals.a, src) elif ist.match(Push(a), vals): src = _get_src_value(vals.a, context) esp = (_get_dst_value(ESP, context) - 4).simplify() context[DEREF(0x4, esp)] = src context[ESP] = esp elif ist.match(Pop(a), vals): dst = _get_dst_value(vals.a, context) src = _get_src_value(DEREF(0x4, ESP), context) esp = (_get_dst_value(ESP, context) + 4).simplify() _set_big_reg(dst, src) context[ESP] = esp elif ist.match(Sub(a, b), vals): dst = _get_dst_value(vals.a, context) src = _get_src_value(vals.b, context) _set_big_reg(vals.a, dst - src) _set_big_reg(EFLAGS, dst - src) elif ist.match(Add(a, b), vals): dst = _get_dst_value(vals.a, context) src = _get_src_value(vals.b, context) _set_big_reg(vals.a, dst + src) _set_big_reg(EFLAGS, dst + src) elif ist.match(Xor(a, b), vals): dst = _get_dst_value(vals.a, context) src = _get_src_value(vals.b, context) _set_big_reg(vals.a, dst ^ src) _set_big_reg(EFLAGS, dst ^ src) elif ist.match(Or(a, b), vals): dst = _get_dst_value(vals.a, context) src = _get_src_value(vals.b, context) _set_big_reg(vals.a, dst | src) _set_big_reg(EFLAGS, dst | src) elif ist.match(And(a, b), vals): dst = _get_dst_value(vals.a, context) src = _get_src_value(vals.b, context) _set_big_reg(vals.a, dst & src) _set_big_reg(EFLAGS, dst & src) elif ist.match(Shl(a, b), vals) or ist.match(Sal(a, b), vals): dst = _get_dst_value(vals.a, context) src = _get_src_value(vals.b, context) _set_big_reg(vals.a, dst << src) _set_big_reg(EFLAGS, dst << src) elif ist.match(Shr(a, b), vals) or ist.match(Sar(a, b), vals): dst = _get_dst_value(vals.a, context) src = _get_src_value(vals.b, context) _set_big_reg(vals.a, dst >> src) _set_big_reg(EFLAGS, dst >> src) elif ist.match(Cmp(a, b), vals): dst = _get_dst_value(vals.a, context) src = _get_src_value(vals.b, context) _set_big_reg(EFLAGS, dst - src) elif ist.match(Test(a, b), vals): dst = _get_dst_value(vals.a, context) src = _get_src_value(vals.b, context) _set_big_reg(EFLAGS, dst & src) elif ist.match(Call(a,b,c), vals): if not isinstance(vals.b, symath.Number) or not isinstance(vals.c, symath.Number): raise BaseException("Call must have numbers for parameters 2 and 3") dst = _get_dst_value(vals.a, context) _set_big_reg(EAX, CALLRESULT(EAX, dst, vals.c)) _set_big_reg(ECX, CALLRESULT(ECX, dst, vals.c)) _set_big_reg(EDX, CALLRESULT(EDX, dst, vals.c)) _set_big_reg(EFLAGS, CALLRESULT(EFLAGS, dst, vals.c)) _set_big_reg(ESP, ESP.substitute(context) + vals.b) else: raise BaseException("instruction %s not understood" % (ist,))