Esempio n. 1
0
def single_char_regex(parsed: Tuple[object, Any],
                      flags: int) -> Optional[z3.ExprRef]:
    """
    Takes a pattern object, like those returned by sre_parse.parse().
    Returns None if `parsed` is not a single-character regular expression.
    Returns an equivalent z3 regular expression if it can find one, or raises
    ReUnhandled if such an expression cannot be determined.
    """
    (op, arg) = parsed
    if op is LITERAL:
        if re.IGNORECASE & flags:
            # TODO: when z3 gets unicode string support, case invariant matching
            # might need to be more complex. (see the casefold() builtin)
            return z3.Union(z3.Re(chr(arg).lower()), z3.Re(chr(arg).upper()))
        else:
            return z3.Re(chr(arg))
    elif op is RANGE:
        lo, hi = arg
        if re.IGNORECASE & flags:
            # TODO: when z3 gets unicode string support, case invariant matching
            # might need to be more complex. (see the casefold() builtin)
            return z3.Union(
                z3.Range(chr(lo).lower(),
                         chr(hi).lower()),
                z3.Range(chr(lo).upper(),
                         chr(hi).upper()),
            )
        else:
            return z3.Range(chr(lo), chr(hi))
    elif op is IN:
        return z3.Union(*(single_char_regex(a, flags) for a in arg))
    elif op is CATEGORY:
        if arg == CATEGORY_DIGIT:
            # TODO: when z3 gets unicode string support, we'll need to
            # extend this logic
            return z3.Range("0", "9")
        raise ReUnhandled
    elif op is ANY and arg is None:
        # TODO: when z3 gets unicode string support, we'll need to
        # revise this logic
        if re.DOTALL & flags:
            return z3.Range(z3.Unit(z3.BitVecVal(0, 8)),
                            z3.Unit(z3.BitVecVal(255, 8)))
            # return z3.Range(chr(0), chr(127))
        else:
            return z3.Union(
                z3.Range(z3.Unit(z3.BitVecVal(0, 8)),
                         z3.Unit(z3.BitVecVal(9, 8))),
                z3.Range(z3.Unit(z3.BitVecVal(11, 8)),
                         z3.Unit(z3.BitVecVal(255, 8))),
            )
    else:
        return None
Esempio n. 2
0
    def setupProblem(self):
        self.stackvars = {}
        self.Qf = z3.Function("final", z3.SeqSort(z3.IntSort()), z3.BoolSort())
        self.d = z3.Function("delta", z3.SeqSort(z3.IntSort()),
                             z3.StringSort(), z3.SeqSort(z3.IntSort()))

        for (w, st, final, has_children) in self.t.iter():

            sv = b"stack" + base64.b16encode(w)
            self.stackvars[w] = Sequence(sv)

            self.s_add_finalstate(w, final)

            if final and has_children:
                self.s_add_nonemptystate(w)

            if len(w):
                self.s_add_transition_to(w)

        self.s.add(self.stackvars[b''] == z3.Unit(z3.IntVal(0)))

        # most useful convention:
        # accept by drained stack, but don't read any more and fail then
        self.s.add(self.Qf(z3.Empty(z3.SeqSort(z3.IntSort()))) == True)
Esempio n. 3
0
    def construct_from_z3_model(self, m, d, Qf, alphabet):
        to_check = [0]
        checked = set(to_check)

        print("Extracting tables")

        self.D = dict()
        self.QF = set()
        self.productive = None

        print("m[d]  = %s" % m[d])
        print("m[qf] = %s" % m[Qf])

        symbols = set([0])

        while len(to_check):
            current = to_check.pop()
            conf = z3.Unit(z3.IntVal(current))

            for a in alphabet:  # range(0, 256):
                y = m.evaluate(
                    d(
                        # z3.SubSeq(conf, z3.Length(conf)-1, 1),
                        conf,
                        z3.StringVal(bytes([a]))),
                    model_completion=True)

                def extract_seq_as_list(y):
                    result = List()
                    for c in y.children():
                        if isinstance(c, z3.SeqRef):
                            result += extract_seq_as_list(c)
                        else:
                            result += List([c.as_long()])
                    return result

                rhs = extract_seq_as_list(y)

                for symbol in rhs:
                    symbols.add(symbol)

                Dq = self.D.setdefault(current, dict())
                Dq[a] = rhs

                for i in rhs:
                    if not i in checked:
                        checked.add(i)
                        to_check += [i]

        if m.evaluate(Qf(z3.Empty(z3.SeqSort(z3.IntSort())))):
            self.QF.add(List([]))

        print("(stack/q) symbols encountered: %s" % symbols)

        for symbol in symbols:
            conf = z3.Unit(z3.IntVal(symbol))
            f = m.evaluate(Qf(conf))
            if f:
                self.QF.add(List([symbol]))

        self.symbols = symbols