Beispiel #1
0
def resolve_op(ist, opnum):
  op = ist.operands[opnum]

  if op.type == 'AbsoluteMemory':
    rv = 0   
    idaist = idautils.DecodeInstruction(ist.address)

    if op.index != None:
      rv += symbolic.symbols(distorm3.Registers[op.index].lower()) * op.scale
    if op.base != None:
      rv += symbolic.symbols(distorm3.Registers[op.base].lower())
    if op.disp != None:
      rv += op.disp
    return DEREF(op.op_size, rv.simplify()) if ist.mnemonic.lower() != 'lea' else rv

  elif op.type == 'Register':
    return symbolic.symbols(distorm3.Registers[op.index].lower())

  elif op.type == 'Immediate':
    return symbolic.symbolic(op.value)

  elif op.type == 'AbsoluteMemoryAddress':
    return DEREF(op.op_size, op.disp)

  else:
    raise BaseException("Unknown Operand Type %s" % (op.type))
Beispiel #2
0
def reg_mask(exp):
  if exp in (AL,BL,CL,DL):
    return 0xff & symbols('E%sX' % (exp.name[0]))
  elif exp in (AH,BH,CH,DH):
    return (0xff00 & symbols('E%sX' % (exp.name[0]))) >> 8
  elif exp in (AX,BX,CX,DX,DI,SI,BP,SP):
    return 0xffff & symbols('E%s' % (exp.name))
  else:
    return exp
Beispiel #3
0
    def xtest_print_edit_distance_metric(self):
        '''
    skip this because
    no longer does _tuple_edit_distance memoize
    '''
        import symath.algorithms.editdistance as ed
        from numpy import *
        ed._tuple_edit_distance.clear_results()
        x, y, z, w = symath.symbols('x y z w')
        exp1 = x(y, z, w, w, x)
        exp2 = x(w, z, w, y)
        print ''
        print 'edit_distance(%s, %s) = %d' % (exp1, exp2,
                                              ed.edit_distance(exp1, exp2))
        rv = ed._tuple_edit_distance.results
        util.pretty(rv)

        m = zeros([len(exp1), len(exp2)], dtype=int)
        for i in range(len(exp1)):
            for j in range(len(exp2)):
                m[i, j] = -1

        for k in rv:
            m[len(k[0][0]), len(k[0][1])] = rv[k][0]
        print m
Beispiel #4
0
  def test_summation(self):
    x,n,y = symath.symbols('x n y')
    expression = symath.functions.Sum(n, x(n) ** y)
    expression_dx = diff(expression, x).simplify()

    valid = (symath.functions.Sum(n, y * (x(n) ** (y - 1)))).simplify()
    self.assertEqual(valid, expression_dx)
Beispiel #5
0
 def setUp(self):
   self.x, self.y, self.z, self.w, self.e1, self.e2 = symath.symbols('x y z w e1 e2')
   self.g = symath.graph.directed.DirectedGraph()
   self.g.connect(self.x, self.y, self.e1)
   self.g.connect(self.y, self.z, self.e2)
   self.g.connect(self.x, self.y, self.e2)
   self.g.connect(self.z, self.w)
   self.g.connect(self.x, self.w)
Beispiel #6
0
 def _set_big_reg(dst, src):
   if dst in (AX,BX,CX,DX,SI,DI,BP,SP):
     edst = symath.symbols('E' + dst.name)
     context[edst.simplify()] = ((edst.substitute(context) & 0xffff0000) | src).simplify()
   elif dst in (AL,BL,CL,DL):
     edst = symath.symbols('E' + dst.name[0] + 'X')
     context[edst.simplify()] = ((edst.substitute(context) & 0xffffff00) | src).simplify()
   elif dst in (AH,BH,CH,DH):
     edst = symath.symbols('E' + dst.name[0] + 'X')
     context[edst.simplify()] = ((edst.substitute(context) & 0xffff00ff) | (src << 8)).simplify()
   elif dst.match(DEREF(a, b)):
     regsonly = {}
     for k in context:
       if not k.match(DEREF(a, b)):
         regsonly[k] = context[k]
     context[dst.substitute(regsonly).simplify()] = src.simplify()
   else:
     context[dst.simplify()] = src.simplify()
Beispiel #7
0
 def test_more_complicated_solver(self):
     x, y = symath.symbols("x y")
     cs = solvers.z3.ConstraintSet()
     cs.add(x < 0)
     cs.add(x ** 3 < y ** 2)
     cs.add(y ** 2 < 9)
     cs.add(y > 0)
     cs.add(x < y)
     r = cs.solve()
     self.assertNotEqual(r, None)
Beispiel #8
0
 def test_more_complicated_solver(self):
     x, y = symath.symbols('x y')
     cs = solvers.z3.ConstraintSet()
     cs.add(x < 0)
     cs.add(x**3 < y**2)
     cs.add(y**2 < 9)
     cs.add(y > 0)
     cs.add(x < y)
     r = cs.solve()
     self.assertNotEqual(r, None)
Beispiel #9
0
  def _get_operand_sym(op):
    if op.type == 'Immediate':
      return symath.symbolic(op.value)
    elif op.type == 'AbsoluteMemoryAddress':
      return DEREF(op.op_size, op.disp)
    elif op.type == 'Register':
      return symath.symbols(distorm3.Registers[op.index].upper())
    elif op.type == 'AbsoluteMemory':
      rv = 0

      if op.index != None:
        rv += symath.symbols(distorm3.Registers[op.index].upper()) * op.scale
      if op.base != None:
        rv += symath.symbols(distorm3.Registers[op.base].upper())
      if op.disp != None:
        rv += symath.symbolic(op.disp)

      return DEREF(op.op_size, rv)
    else:
      raise BaseException("Unknown operand type %s (%s)" % (op.type, op))
Beispiel #10
0
    def _get_operand_sym(op):
        if op.type == 'Immediate':
            return symath.symbolic(op.value)
        elif op.type == 'AbsoluteMemoryAddress':
            return DEREF(op.op_size, op.disp)
        elif op.type == 'Register':
            return symath.symbols(distorm3.Registers[op.index].upper())
        elif op.type == 'AbsoluteMemory':
            rv = 0

            if op.index != None:
                rv += symath.symbols(
                    distorm3.Registers[op.index].upper()) * op.scale
            if op.base != None:
                rv += symath.symbols(distorm3.Registers[op.base].upper())
            if op.disp != None:
                rv += symath.symbolic(op.disp)

            return DEREF(op.op_size, rv)
        else:
            raise BaseException("Unknown operand type %s (%s)" % (op.type, op))
Beispiel #11
0
  def test_wilds_dont_substitute(self):
    '''
    it is implicitly assumed that substitute is too "dumb" to account for wilds
    in some places in the code base, this makes sure that doesnt change without
    making sure the rest of the code base is updated
    '''

    a,b = symath.wilds('a b')
    x,y = symath.symbols('x y')

    subs = { x(a): x(x) }
    self.assertEqual(x(y).substitute(subs), x(y))
    self.assertEqual(x(a).substitute(subs), x(x)) # this one *should* substitute
    self.assertEqual(x(b).substitute(subs), x(b))
Beispiel #12
0
def forward_data_flow(source, ea=None, calldepth=0):
    if ea == None:
        ea = ScreenEA()

    _clear_colored()

    inst, dst, src = wilds("inst dst src")
    w = WildResults()

    tainted = VersionedSet()
    tainted.version = -1
    tainted.add(source)

    def _fix_esp(ea, exp):
        spd = GetSpd(ea)
        return exp.substitute({esp: (esp + spd).simplify()})

    fg = FunctionGraph(ea)

    # data connections graph
    TAINTED = symbols("TAINTED")
    dg = DirectedGraph()
    dg.connect(TAINTED, source)

    for addr, level in fg.walk(ea, depthfirst=True):
        if level <= tainted.version:
            print "reverting to version %s" % (level - 1)
            tainted = tainted.get_version(level - 1)

        tainted.version = level

        syminst = symdecode(addr)

        if syminst.match(inst(dst, src), w) and w.inst in tainted_dst_src_insts:
            print "analyzing %s" % (syminst,)

            # untaint cleared registers
            if syminst.match(XOR(dst, dst)) and w.dst in tainted:
                tainted.remove(w.dst)

            elif w.src in tainted:
                _color(addr)
                print "tainting %s" % (w.dst,)
                tainted.add(w.dst)

            elif w.dst in tainted:
                tainted.remove(w.dst)

    return tainted
Beispiel #13
0
    def test_wilds_dont_substitute(self):
        '''
    it is implicitly assumed that substitute is too "dumb" to account for wilds
    in some places in the code base, this makes sure that doesnt change without
    making sure the rest of the code base is updated
    '''

        a, b = symath.wilds('a b')
        x, y = symath.symbols('x y')

        subs = {x(a): x(x)}
        self.assertEqual(x(y).substitute(subs), x(y))
        self.assertEqual(x(a).substitute(subs),
                         x(x))  # this one *should* substitute
        self.assertEqual(x(b).substitute(subs), x(b))
Beispiel #14
0
def signature(ea=None):
  if ea == None:
    ea = idc.ScreenEA()

  off = symath.symbols('off')
  _signature = symath.symbols('signature')

  fg = functiongraph.FunctionGraph(ea)
  ns = copy.copy(fg.nodes.keys())
  ns.sort()
  rv = []

  def _(exp):
    if isinstance(exp, symath.Number) and int(exp.n) in ns:
      return off(ns.index(int(exp.n)))
    elif is_register(exp) and exp not in (ESP,):
      return symath.wild(str(exp))
    else:
      return exp

  for i in ns:
    rv.append(decode(i).walk(_))

  return _signature(*rv)
Beispiel #15
0
  def test_edit_distance(self):
    from symath.algorithms.editdistance import edit_distance,edit_substitutions
    x,y,z = symath.symbols('x y z')
    a,b,c = symath.wilds('a b c')
    self.assertEqual(edit_distance(x(x, y, x), y(x, x, x)), 2)
    self.assertEqual(edit_distance(x(y, x), x(y, y, x)), 1)
    self.assertEqual(edit_distance(x(y, x), x(x)), 1)
    self.assertEqual(edit_distance(x(y, y, x), x(x)), 2)
    self.assertEqual(edit_distance(a, x(y, y)), 0)
    self.assertEqual(edit_distance(a(x, x), x(y, x)), 1)

    self.assertNotEqual(edit_distance(y(x(a, b), x(b, a)), y(x(a, b), x(a, b))), 0)
    self.assertEqual(edit_distance(y(x(a, b), x(b, a)), y(x(a, b), x(a, b))), 2)
    self.assertEqual(edit_distance(y(x(a, b), x(b, a)), x(x(a, b), x(a, b))), 3)

    exp1 = y(x, x, x, y, y, x, x, x, y)
    exp2 = y(x, y, y, x, y, x, x, y)
    self.assertEqual(edit_distance(exp1, exp2), 3)
Beispiel #16
0
def powerhash(graph):
    '''
  TODO: MAY BE BROKEN SINCE SWITCHING TO SPARSE MATRIX FORMAT FOR GRAPHS

  based on: Approaches to Solving The Graph Isomorphism Problem by Jordy Eikenberry

  returns: Eikenberry(nodecount : int, hash : string)

  The algorithm:
    Take an adjacency matrix A of the graph

    remember that the (A ** n)[a,b] is the number of
    paths from a -> b of length n

    for every integer 1 .. n raise A to n and then 
    canonicalize the diagnol (via sorting) and use this
    as a token for the final hash

    isomorphic graphs will have the same count of 
    closed paths for every length

    a True result as usual only means it's a canidate, it does not imply isomorphism
  '''

    A = graph.adjacency_matrix()[1]
    Aprime = copy.copy(A)

    nc = len(A)
    d = symbols('d')

    hsum = []
    for n in range(len(A)):
        dia = list(numpy.diag(Aprime))
        dia.sort()
        Aprime = numpy.dot(Aprime, A)
        hsum.append(d(*dia))

    hsum = str(GraphPowerHash(*hsum))
    hsum = hashlib.sha256(hsum).hexdigest()

    return GraphPowerHash(nc, hsum)
Beispiel #17
0
def powerhash(graph):
    """
  TODO: MAY BE BROKEN SINCE SWITCHING TO SPARSE MATRIX FORMAT FOR GRAPHS

  based on: Approaches to Solving The Graph Isomorphism Problem by Jordy Eikenberry

  returns: Eikenberry(nodecount : int, hash : string)

  The algorithm:
    Take an adjacency matrix A of the graph

    remember that the (A ** n)[a,b] is the number of
    paths from a -> b of length n

    for every integer 1 .. n raise A to n and then 
    canonicalize the diagnol (via sorting) and use this
    as a token for the final hash

    isomorphic graphs will have the same count of 
    closed paths for every length

    a True result as usual only means it's a canidate, it does not imply isomorphism
  """

    A = graph.adjacency_matrix()[1]
    Aprime = copy.copy(A)

    nc = len(A)
    d = symbols("d")

    hsum = []
    for n in range(len(A)):
        dia = list(numpy.diag(Aprime))
        dia.sort()
        Aprime = numpy.dot(Aprime, A)
        hsum.append(d(*dia))

    hsum = str(GraphPowerHash(*hsum))
    hsum = hashlib.sha256(hsum).hexdigest()

    return GraphPowerHash(nc, hsum)
Beispiel #18
0
    def test_edit_distance(self):
        from symath.algorithms.editdistance import edit_distance, edit_substitutions
        x, y, z = symath.symbols('x y z')
        a, b, c = symath.wilds('a b c')
        self.assertEqual(edit_distance(x(x, y, x), y(x, x, x)), 2)
        self.assertEqual(edit_distance(x(y, x), x(y, y, x)), 1)
        self.assertEqual(edit_distance(x(y, x), x(x)), 1)
        self.assertEqual(edit_distance(x(y, y, x), x(x)), 2)
        self.assertEqual(edit_distance(a, x(y, y)), 0)
        self.assertEqual(edit_distance(a(x, x), x(y, x)), 1)

        self.assertNotEqual(
            edit_distance(y(x(a, b), x(b, a)), y(x(a, b), x(a, b))), 0)
        self.assertEqual(
            edit_distance(y(x(a, b), x(b, a)), y(x(a, b), x(a, b))), 2)
        self.assertEqual(
            edit_distance(y(x(a, b), x(b, a)), x(x(a, b), x(a, b))), 3)

        exp1 = y(x, x, x, y, y, x, x, x, y)
        exp2 = y(x, y, y, x, y, x, x, y)
        self.assertEqual(edit_distance(exp1, exp2), 3)
Beispiel #19
0
  def xtest_print_edit_distance_metric(self):
    '''
    skip this because
    no longer does _tuple_edit_distance memoize
    '''
    import symath.algorithms.editdistance as ed
    from numpy import *
    ed._tuple_edit_distance.clear_results()
    x,y,z,w = symath.symbols('x y z w')
    exp1 = x(y, z, w, w, x)
    exp2 = x(w, z, w, y)
    print ''
    print 'edit_distance(%s, %s) = %d' % (exp1, exp2, ed.edit_distance(exp1, exp2))
    rv = ed._tuple_edit_distance.results
    util.pretty(rv)

    m = zeros([len(exp1), len(exp2)], dtype=int)
    for i in range(len(exp1)):
      for j in range(len(exp2)):
        m[i,j] = -1

    for k in rv:
      m[len(k[0][0]), len(k[0][1])] = rv[k][0]
    print m
Beispiel #20
0
 def setUp(self):
     self.x, self.y = symath.symbols("x y")
Beispiel #21
0
#!/usr/bin/env python

from symath import symbols

# data move
Mov = symbols('mov')
Movzx = symbols('movzx')
Movsx = symbols('movsx')
Push = symbols('push')
Pop = symbols('pop')
Lea = symbols('lea')

# arithmetic operations
Sub = symbols('sub')
Add = symbols('add')
Xor = symbols('xor')
And = symbols('and')
Or = symbols('or')
Shr = symbols('shr')
Shl = symbols('shl')
Sar = symbols('sar')
Sal = symbols('sal')

# comparison
Cmp = symbols('cmp')
Test = symbols('test')

# call instructions are weird in their definition
# they need to pass as a second argument the stack change
# and as the third argument the address at which the call
# takes place
Beispiel #22
0
  def test_log(self):
    x,y = symath.symbols('x y')
    expression = symath.functions.Log(x * y)
    dx = diff(expression, x).simplify()

    self.assertEqual(dx, (y / (x * y)).simplify())
Beispiel #23
0
 def test_diff_quotient_rule(self):
   x = symath.symbols('x')
   print diff(1 / x, x)
   self.assertEqual(diff(1 / x, x).simplify(), (-1 / x**2).simplify())
Beispiel #24
0
 def test_diff_chain_rule(self):
   Exp,x,y = symath.symbols('Exp x y')
   self.assertEqual(diff(Exp(2 * x), x).simplify(), (2 * Exp(2 * x)).simplify())
Beispiel #25
0
 def test_diff_non_var(self):
   x,y = symath.symbols('x y')
   dx = diff(y, x)
   self.assertEqual(dx, 0)
Beispiel #26
0
 def setUp(self):
   self.x, self.y, self.z = symath.symbols('x y z')
Beispiel #27
0
  def test_symbol_inequal_wild(self):
    a = symath.wilds('a')
    sa = symath.symbols('a')

    self.assertNotEqual(sa, a)
Beispiel #28
0
 def test_union(self):
   og = symath.graph.directed.DirectedGraph()
   og.connect(self.x, symath.symbols('ognode'))
   og.union(self.g)
   self.assertTrue(og.connectedQ(self.x, self.y))
Beispiel #29
0
 def __symbolic_column_name__(self, colnum):
     x = symbols('x')
     return x(colnum)
Beispiel #30
0
 def setUp(self):
     self.x, self.y, self.z = symath.symbols('x y z')
Beispiel #31
0
    def test_symbol_inequal_wild(self):
        a = symath.wilds('a')
        sa = symath.symbols('a')

        self.assertNotEqual(sa, a)
Beispiel #32
0
 def setUp(self):
     self.w, self.v = symath.wilds('w v')
     self.x, self.y = symath.symbols('x y')
     self.head = symath.symbols('head')
Beispiel #33
0
defines a number of "signature" heuristics for graphs
which can be used for trimming out candidate graphs
in isomorphism testing

some signatures are specific to specific types of graphs,
in that case, it is documented in the help for the particular
signature function
'''

import symath.util
from symath import symbols, wilds, WildResults
import numpy
import hashlib
import copy

GraphSummation = symbols('GraphSummation')
GraphDensity = symbols('GraphDensity')
GraphComplexity = symbols('GraphComplexity')
GraphPowerHash = symbols('GraphPowerHash')


def summation(graph):
    '''
  returns GraphSummation(nodes : int, edges : int)
  '''

    sum_out = 0
    sum_in = 0

    for n in graph.nodes.values():
        sum_out += len(n.outgoing)
Beispiel #34
0
 def test_diff_power_rule(self):
   x = symath.symbols('x')
   dx = diff(x ** 2, x)
   self.assertEqual(dx.simplify(), (2 * x).simplify())
Beispiel #35
0
#!/usr/bin/env
from symath import symbols,wilds,WildResults,symbolic

CALLRESULT = symbols('CALLRESULT')
DEREF = symbols('DEREF')
EAX,EBX,ECX,EDX = symbols('EAX EBX ECX EDX')
EDI,ESI,ESP,EBP = symbols('EDI ESI ESP EBP')
EFLAGS = symbols('EFLAGS')

AX,BX,CX,DX,DI,SI,BP,SP = symbols('AX BX CX DX DI SI BP SP')
AL,AH,BL,BH,CL,CH,DL,DH = symbols('AL AH BL BH CL CH DL DH')

def reg_size(reg):
  a,b = wilds('a b')
  val = WildResults()

  if reg in (AX,BX,CX,DX,DI,SI,BP,SP):
    return symbolic(2)
  elif reg in (AL,AH,BL,BH,CL,CH,DL,DH):
    return symbolic(1)
  elif reg in (EAX,EBX,ECX,EDX,EDI,ESI,EBP,ESP,EFLAGS):
    return symbolic(4)
  elif reg.match(DEREF(a, b), val):
    return val.a
  else:
    raise BaseException('Unknown Register %s' % reg)

def is_register(exp):
  return exp in (AX,BX,CX,DX,DI,SI,BP,SP,AL,AH,BL,BH,CL,CH,DL,DH,EAX,EBX,ECX,EDX,EDI,ESI,EBP,ESP,EFLAGS)

def reg_mask(exp):
Beispiel #36
0
 def test_diff_product(self):
   x,y = symath.symbols('x y')
   
   self.assertEqual(diff(x * y, x).simplify(), y)
   self.assertEqual(diff(y * x ** 3, x).simplify(), (3 * y * x ** 2).simplify())
Beispiel #37
0
 def setUp(self):
   self.w, self.v = symath.wilds('w v')
   self.x, self.y = symath.symbols('x y')
   self.head = symath.symbols('head')
Beispiel #38
0
 def test_diff_fail_on_unknown_function(self):
   with self.assertRaises(DifferentiationError):
     Unknown,x = symath.symbols('Unknown x')
     diff(Unknown(x), x)
Beispiel #39
0
from symath import symbols

ADD,SUB,MUL,IMUL,DIV,IDIV = symbols('ADD SUB MUL IMUL DIV IDIV')
MOV,LEA = symbols('MOV LEA')
PUSH,POP,PUSHA,POPA = symbols('PUSH POP PUSHA POPA')
XOR,AND,OR = symbols('XOR AND OR')
SAR,SHR,SAL,SHL = symbols('SAR SHR SAL SHL')
INC,DEC = symbols('INC DEC')
MOVSX,MOVZX = symbols('MOVSX MOVZX')
CMP,TEST = symbols('CMP TEST')

control_flow_instructions = (JMP,JZ,JNZ,JA,JB,JNA,JNB,JE,JNE,JG,JL,JNG,JNL) = symbols('JMP JZ JNZ JA JB JNA JNB JE JNE JG JL JNG JNL')
Beispiel #40
0
# -*- coding: utf-8 -*-
# <nbformat>3.0</nbformat>

# <codecell>

from symath import symbols, wilds, WildResults, functions, stdops
from IPython.display import Latex

_greek = symbols('theta gamma Theta Gamma alpha beta Alpha Beta Delta delta pi Pi phi Phi')

def _idisplay(exp):
    x,y,z,n = wilds('x y z n')
    ws = WildResults()
    
    if exp.match(x ** y, ws):
        return r"{%s} ^  {%s}" % (_idisplay(ws.x), _idisplay(ws.y))
    
    elif exp in _greek:
        return r'\%s' % (str(exp),)
    
    elif exp.match(-1 * x, ws):
        return r'-{%s}' % (_idisplay(ws.x),)
    
    elif exp.match(x + y, ws):
        return r'{%s} + {%s}' % (_idisplay(ws.x), _idisplay(ws.y))
    
    elif exp.match(x - y, ws):
        return r'{%s} - {%s}' % (_idisplay(ws.x), _idisplay(ws.y))
    
    elif exp.match(x * y, ws):
        return r'{%s} {%s}' % (_idisplay(ws.x), _idisplay(ws.y))
Beispiel #41
0
class NFA(object):
    EPSILON, ANY = symbols('NFA_EPSILON NFA_ANY')

    def __init__(self, start_state, magic=None):
        self._start_state = start_state
        self._transitions = {}
        self._transitions_to = {}
        self._final_states = set()
        self._has_epsilons = False
        self._bytecode = None
        self._interupt_states = set()
        self._magic = magic
        self._gcrefs = []
        self._tag_assocs = util.Associations()
        self._tcounter = 0
        self._states = set()
        self._state_hooks = {}
        self.do_tags = False
        self.choose = lambda a, b: a

    def _choose(self, arglist):
        rv = None
        for i in arglist:
            if rv == None:
                rv = i
            else:
                rv = self.choose(rv, i)

    def set_state_hook(self, state, hook):
        if hook == None:
            if state in self._state_hooks:
                del self._state_hooks[state]
        else:
            self._state_hooks[state] = hook

    def transitions_to(self, dst):
        '''
    returns enumerable of (prevstate, t) tuples
    this is super slow and needs to be sped up
    '''
        if dst in self._transitions_to:
            for t in self._transitions_to[dst]:
                for s in self._transitions_to[dst][t]:
                    yield (s, t)

    def tag(self, transition, src, dst, tagid=None):
        return self._tag_assocs.associate((transition, src, dst), tagid)

    def is_tagged(self, transition, src, dst):
        return (transition, src, dst) in self._tag_assocs

    def reltags(self, src, cache=None):
        '''
    returns all the tags that are relevant at this state
    cache should be a dictionary and it is updated
    by the function
    '''
        if not self._tag_assocs:
            return set()

        # f*****g python and it's terrible support for recursion makes this
        # far more complicated than it needs to be
        if cache == None:
            cache = {}

        q = _otq()
        q.append(src)
        updateq = _otq()

        while q:
            i = q.popleft()
            if i in cache:
                continue

            cache[i] = set()
            for (s, t) in self.transitions_to(i):
                q.append(s)
                if self.is_tagged(t, s, i):
                    cache[i].add((self.tag(t, s, i), s, i))
                updateq.appendleft((i, s))

        while updateq:
            i = updateq.popleft()
            cache[i[0]].update(cache[i[1]])

        return cache[src]

    def _add_epsilon_states(self, stateset, gathered_epsilons):
        '''
    stateset is the list of initial states
    gathered_epsilons is a dictionary of (dst: src) epsilon dictionaries
    '''
        for i in list(stateset):
            if i not in gathered_epsilons:
                gathered_epsilons[i] = {}
                q = _otq()
                q.append(i)
                while q:
                    s = q.popleft()
                    for j in self._transitions.setdefault(s, {}).setdefault(
                            NFA.EPSILON, set()):
                        gathered_epsilons[i][
                            j] = s if j not in gathered_epsilons[
                                i] else self.choose(s, j)
                        q.append(j)
            stateset.update(gathered_epsilons[i].keys())

    def add_interupt_state(self, state):
        self._interupt_states.add(state)

    def transitions(self, current_states, cached_transitions=None):
        if cached_transitions == None:
            cached_transitions = {}

        rv = set()
        for cs in current_states:
            if cs not in cached_transitions:
                cached_transitions[cs] = set()
                for t in self._transitions.setdefault(cs, {}):
                    if t in set([NFA.ANY, NFA.EPSILON]):
                        continue
                    if self._transitions[cs][t]:
                        cached_transitions[cs].add(t)
            rv.update(cached_transitions[cs])
        return rv

    def nextstates(self, current_states, transition):
        rv = set()
        for cs in current_states:
            rv.update(
                self._transitions.setdefault(cs,
                                             {}).setdefault(transition, set()))

        if transition not in (NFA.ANY, NFA.EPSILON):
            for cs in current_states:
                rv.update(self._transitions[cs].setdefault(NFA.ANY, set()))

        return rv

    def _write_transition_code(self, utags, ltags, codeblock):
        utagd = {}
        for i in utags:
            if i[0] in utagd:
                utagd[i[0]] = self.choose(i[1], utagd[i[0]])
            else:
                utagd[i[0]] = i[1]

        ltagd = {}
        for i in ltags:
            if i[0] in utagd:
                continue
            elif i[0] not in ltagd or self.choose(ltagd[i[0]][0],
                                                  i[1]) == i[1]:
                ltagd[i[0]] = (i[1], i[2])

        for k in utagd:
            codeblock.append(VM.UpdateTagV(k, utagd[k]))

        for k in ltagd:
            codeblock.append(VM.LoadTagV(k, ltagd[k][0], ltagd[k][1]))

    def _transitions_to_dfa_bytecode(self, sources, trn, \
        cached_tcode, \
        debug=False, \
        compiled_states=None, \
        gathered_epsilons=None, \
        cached_transitions=None, \
        reltags_cache=None \
        ):

        key = (trn, tuple(sources))
        if key in cached_tcode:
            return cached_tcode[key]

        # get the stateblock
        sb = self._states_to_dfa_bytecode(sources, tran=trn, debug=debug, \
            compiled_states=compiled_states, gathered_epsilons=gathered_epsilons, \
            cached_transitions=cached_transitions,cached_tcode=cached_tcode, \
            reltags_cache=reltags_cache)

        # build the transition block
        tb = self._bytecode.newblock("Transition 0x%x" % (self._tcounter))
        self._tcounter += 1

        # get a list of tags to emit code for, and reltags to copy previous values from
        if self.do_tags:
            tags = set()
            rtags = set()
            for s in sources:
                for d in self._transitions[s].setdefault(trn, set()):
                    rtags.update(self.reltags(d, reltags_cache))
                    if self.is_tagged(trn, s, d):
                        tags.add((self.tag(trn, s, d), d))

            self._write_transition_code(tags, rtags, tb)

        # if tb is empty, just return the stateblock, no need for an extra jmp
        if not tb:
            cached_tcode[key] = sb
            return sb

        # jump to the state block
        tb.append(VM.Jmp(sb))

        # return
        cached_tcode[key] = tb
        return tb

    def _states_to_dfa_bytecode(self, states, \
        tran=None, \
        debug=False, \
        compiled_states=None, \
        gathered_epsilons=None, \
        cached_transitions=None, \
        cached_tcode=None, \
        reltags_cache=None \
        ):
        '''returns the instruction pointer to the bytecode added'''
        pstates = copy.copy(states)

        if reltags_cache == None:
            reltags_cache = {}

        if cached_tcode == None:
            cached_tcode = {}

        if cached_transitions == None:
            cached_transitions = {}

        if gathered_epsilons == None:
            gathered_epsilons = {}

        self._add_epsilon_states(states, gathered_epsilons)

        if tran != None:
            states = self.nextstates(states, tran)
            self._add_epsilon_states(states, gathered_epsilons)

        if self._magic != None:
            states = states.union(self._magic(states))

        tstates = tuple(states)

        # this is used so we only compile each stateset once
        if compiled_states == None:
            compiled_states = {}

        if tstates in compiled_states:
            return compiled_states[tstates]

        # grab the ip from our codeblock
        ip = self._bytecode.newblock(tstates)
        compiled_states[tstates] = ip

        # TODO
        # epsilon transitions are never 'taken' so we need
        # to insert any ltagv/utagv instructions required
        # for all epsilon transitions
        # gathered_epsilons[state] holds a dictionary of dst: src mappings, so we can use that data

        if self.do_tags:
            tags = set()
            rtags = set()

            for ts in pstates:
                for dst in gathered_epsilons[ts]:
                    rtags.update(self.reltags(dst, reltags_cache))
                    src = gathered_epsilons[ts][dst]
                    if self.is_tagged(NFA.EPSILON, src, dst):
                        tags.add((self.tag(NFA.EPSILON, src, dst), dst))

            self._write_transition_code(tags, rtags, ip)

        # run any defined state hooks
        for s in tstates:
            if s in self._state_hooks:
                ip.append(VM.PyCode(self._state_hooks[s]))

        # do a multi-match for any final states
        finals = self._final_states.intersection(states)
        if len(finals) > 0:
            ip.append(VM.MultiMatch(finals))

        # do any interupts required
        interupts = self._interupt_states.intersection(states)
        if len(interupts) > 0:
            ip.append(VM.MultiInterupt(interupts))

        # consume a character
        ip.append(VM.Consume())

        ts = self.transitions(states, cached_transitions)

        if debug:
            print 'compiling bytecode for stateset:\n\t%s\n\t0x%x: %s' % (
                states, ip, (defaults, ts))

        def mkbytecode(t):
            return lambda: self._transitions_to_dfa_bytecode(
                states,
                t,
                cached_tcode,
                debug=debug,
                compiled_states=compiled_states,
                gathered_epsilons=gathered_epsilons,
                cached_transitions=cached_transitions,
                reltags_cache=reltags_cache)

        # for any of the non-default states add a conditional jmp
        for k in ts:

            if k in (NFA.ANY, NFA.EPSILON):
                continue

            jmppoint = VM.DelayedArg(mkbytecode(k))
            ip.append(VM.Compare(k))
            ip.append(VM.CondJmp(jmppoint))

        # jmp to default state if there is one, otherwise leave
        defaults = self.nextstates(states, NFA.ANY)
        if len(defaults) > 0:
            jmppoint = VM.DelayedArg(mkbytecode(NFA.ANY))
            ip.append(VM.Jmp(jmppoint))
        else:
            ip.append(VM.Leave())

        # return the instruction pointer
        return ip

    def copy(self):
        rv = NFA(self._start_state)
        rv._final_states = copy.deepcopy(self._final_states)
        rv._has_epsilons = self._has_epsilons
        rv._transitions = {}
        rv._transitions_to = {}
        rv._bytecode = self._bytecode
        for i in self._transitions:
            for j in self._transitions[i]:
                rv._transitions.setdefault(
                    i, {})[j] = self._transitions[i][j].copy()
        for i in self._transitions_to:
            for j in self._transitions_to[i]:
                rv._transitions_to.setdefault(
                    i, {})[j] = self._transitions_to[i][j].copy()
        return rv

    def all_states(self):
        rv = set([self._start_state])
        for s in self._transitions:
            for ns in self._transitions[s].values():
                for nns in ns:
                    rv.add(nns)
        return rv

    def add_final_state(self, state):
        self._final_states.add(state)
        self._bytecode = None

    def clear_final_states(self):
        self._final_states = set()
        self._bytecode = None

    def find_epsilon_states(self, state, rv=set()):
        for i in self._transitions.setdefault(state, {}).setdefault(
                NFA.EPSILON, set()):
            if i not in rv:
                rv.add(i)
                self.find_epsilon_states(i, rv=rv)
        return rv

    def get_starting_states(self):
        epstates = self.find_epsilon_states(self._start_state)
        return set.union(epstates, set([self._start_state]))

    def bytecode(self, debug=False):
        if self._bytecode == None:
            self._bytecode = VM.CodeBlock('EntryPoint')
            self._bytecode.append(
                VM.Jmp(
                    VM.DelayedArg(lambda: self._states_to_dfa_bytecode(
                        set([self._start_state]), debug=debug))))

        return self._bytecode

    def execute(self, tokenstring, debug=False):
        bc = self.bytecode()
        rv = bc.execute(tokenstring,
                        debug=debug,
                        state_count=len(self.all_states()),
                        tag_count=len(self._tag_assocs))
        return rv

    def add_transition(self, oldstate, token, newstate):
        self._transitions.setdefault(oldstate, {}).setdefault(token, set())
        self._transitions[oldstate][token].add(newstate)

        self._transitions_to.setdefault(newstate, {}).setdefault(token, set())
        self._transitions_to[newstate][token].add(oldstate)

        if token == NFA.EPSILON:
            self._has_epsilons = True
        self._bytecode = None

    def locate_final_states(self):
        dstates = set()
        for i in self._transitions:
            for t in self._transitions[i]:
                dstates = dstates.union(self._transitions[i][t])

        sstates = set([self._start_state])
        for i in self._transitions:
            if len(self._transitions[i]) > 0:
                sstates.add(i)

        self._final_states = dstates.difference(sstates)

        return self._final_states

    def get_transitions(self, oldstate, newstate):
        rv = set()
        for t in self._transitions.setdefault(oldstate, {}):
            if newstate in self._transitions[oldstate][t]:
                rv.add(t)
        return rv

    def get_following_states(self, oldstate):
        rv = set()
        for i in self._transitions.setdefault(oldstate, {}).values():
            rv = set.union(rv, i)
        return rv

    def final_states(self, states):
        return set.intersection(states, self._final_states)

    def to_graph(self):
        from symath.graph import directed
        g = directed.DirectedGraph()

        for s in self.all_states():
            g.add_node(s)
            for t in self._transitions.setdefault(s, {}):
                for dest in self._transitions[s][t]:
                    lbl = None
                    if t == NFA.EPSILON:
                        lbl = 'E'
                    elif t == NFA.ANY:
                        lbl = '*'
                    else:
                        lbl = "'%s'" % (t if 0x30 <= ord(str(t)) <= 0x7a else
                                        'chr(%s)' % (ord(str(t))), )

                    if self.is_tagged(t, s, dest):
                        lbl = "%s/%s" % (lbl, self.tag(t, s, dest))

                    g.connect(s, dest, lbl)

        for fs in self._final_states:
            g.set_color(fs, 'red')

        for hs in self._state_hooks:
            g.set_color(hs, 'blue')

        g.set_color(self._start_state, 'green')

        return g

    @staticmethod
    def _test():
        print '----- NFA TEST -----'
        nfa = NFA(0)

        # should match [ab].abcdef
        nfa.add_transition(0, 'a', 1)
        nfa.add_transition(0, 'b', 3)
        nfa.add_transition(1, NFA.ANY, 2)

        rest = "cdef"

        for i in range(len(rest)):
            nfa.add_transition(2 + i, rest[i], 3 + i)

        nfa.locate_final_states()

        def _exec(s):
            nothing = True
            for i in nfa.execute(s):
                print "execute('%s') = %s" % (s, i)
                nothing = False
            if nothing:
                print "execute('%s') = No Results" % (s)

        #print 'nfa regex: %s' % (nfa.to_regex(hexesc=False))
        #print "execute('abcdefhi') = %s" % (nfa.execute("accdefhi"))
        #print "execute('ccdef') = %s" % (nfa.execute("ccdef"))
        #print "execute('bdef') = %s" % (nfa.execute("bdef"))
        _exec('abcdefhi')
        _exec('ccdef')
        _exec('bdef')

        bc = nfa.bytecode()
        bc = bc.link()
        print bc
Beispiel #42
0
 def setUp(self):
     self.x, self.y = symath.symbols('x y')
Beispiel #43
0
 def __symbolic_column_name__(self, colnum):
   x = symbols('x')
   return x(colnum)
Beispiel #44
0
from symath import symbols

VER_ADD,VER_DELETE = symbols('VER_ADD VER_DELETE')

def op_add(obj, version):
  return (VER_ADD(version), obj)

def op_del(obj, version):
  return (VER_DELETE(version), obj)
Beispiel #45
0
#!/usr/bin/env python

import distorm3
import symath as symbolic 
import copy
import memoize
import symath.graph.algorithms as algorithms
from memoize import Memoize
from functiongraph import FunctionGraph

from idafun import *

# registers
eax,ebx,ecx,edx,esi,edi,ebp,esp = symbolic.symbols('eax ebx ecx edx esi edi ebp esp')
ax,bx,cx,dx,si,di,bp,sp = symbolic.symbols('ax bx cx dx si di bp sp')
al,ah,bl,bh,cl,ch,dl,dh = symbolic.symbols('al ah bl bh cl ch dl dh')
eflags = symbolic.symbols('eflags')

# functions
DEREF = symbolic.symbols('DEREF')
PHI = symbolic.symbols('PHI', associative=True, commutative=True)
AT = symbolic.symbols('@')
CALL = symbolic.symbols('CALL')
LOOKUP = symbolic.symbols('=>')

regmasks = \
    {
    ax: eax & 0xffff,
    bx: ebx & 0xffff,
    cx: ecx & 0xffff,
    dx: edx & 0xffff,