예제 #1
0
파일: taint.py 프로젝트: bniemczyk/revtools
def forward_data_flow(source, ea=None, calldepth=0):
    if ea == None:
        ea = ScreenEA()

    _clear_colored()

    inst, dst, src = wilds("inst dst src")
    w = WildResults()

    tainted = VersionedSet()
    tainted.version = -1
    tainted.add(source)

    def _fix_esp(ea, exp):
        spd = GetSpd(ea)
        return exp.substitute({esp: (esp + spd).simplify()})

    fg = FunctionGraph(ea)

    # data connections graph
    TAINTED = symbols("TAINTED")
    dg = DirectedGraph()
    dg.connect(TAINTED, source)

    for addr, level in fg.walk(ea, depthfirst=True):
        if level <= tainted.version:
            print "reverting to version %s" % (level - 1)
            tainted = tainted.get_version(level - 1)

        tainted.version = level

        syminst = symdecode(addr)

        if syminst.match(inst(dst, src), w) and w.inst in tainted_dst_src_insts:
            print "analyzing %s" % (syminst,)

            # untaint cleared registers
            if syminst.match(XOR(dst, dst)) and w.dst in tainted:
                tainted.remove(w.dst)

            elif w.src in tainted:
                _color(addr)
                print "tainting %s" % (w.dst,)
                tainted.add(w.dst)

            elif w.dst in tainted:
                tainted.remove(w.dst)

    return tainted
예제 #2
0
def symstep(addr=None, known=None):
  '''
  takes a HashableDict of known values and returns a copy of it that is updated
  after executing the instruction at addr
  '''

  if addr == None:
    addr = ScreenEA()

  if known == None:
    known = HashableDict()

  # use IDAs stack analysis, instead of tracking it ourselves, so we don't have to
  # ascend into all calls, also allows easier fixing by hand if required
  # this does mean that esp will always be reflected in known as though it's been calculated
  # from the beginning of the function instead of where ever we really started calculating,
  # but that is ok with me for now
  known = known.copy()
  known[esp] = (esp + GetSpd(addr))

  inst,src,dst = wilds('inst src dst')
  w = WildResults()

  si = symdecode(addr)

  exp = None

  if si.match(ADD(dst,src), w):
    exp = w.src + w.dst

  elif si.match(SUB(dst,src), w) or si.match(CMP(dst,src), w):
    exp = w.dst - w.src

  elif si.match(MUL(dst,src), w):
    exp = w.dst * w.src

  elif si.match(DIV(dst,src), w):
    exp = w.dst / w.src

  elif si.match(MOV(dst,src), w) or si.match(LEA(dst,src), w):
    exp = w.src

  elif si.match(XOR(dst, src), w):
    exp = w.dst ^ w.src

  elif si.match(OR(dst, src), w):
    exp = w.dst | w.src

  elif si.match(AND(dst, src), w) or si.match(TEST(dst, src), w):
    exp = w.dst & w.src

  elif si.match(SAR(dst, src), w) or si.match(SHR(dst, src), w):
    exp = w.dst >> w.src

  elif si.match(SAL(dst, src), w) or si.match(SHL(dst, src), w):
    exp = w.dst << w.src

  elif si.match(INC(dst), w):
    exp = w.dst + 1

  elif si.match(DEC(dst), w):
    exp = w.dst - 1

  elif si.match(PUSH(src), w):
    exp = w.src

  elif si.match(POP(dst), w):
    exp = DEREF(0x4, known[esp])

    if exp in known:
      exp = known[exp]

  # for control flow operations we just don't do anything
  elif si[0] in control_flow_instructions:
    return (None, known)

  exp = exp.simplify()
  exp = exp.substitute(known).simplify()

  # update our known
  if si.match(PUSH(src), w):
    dst = DEREF(0x4, known[esp]-4).simplify()

  elif exp != None and (si.match(inst(dst), w) or si.match(inst(dst, src), w)):
    dst = w.dst

    if w.dst[0] == DEREF:
      dst = w.dst.substitute(known).simplify()

  else:
    raise Exception("Uknown instruction in symstep: %s" % (si,))

  known[dst] = exp

  return (exp, known)