def single_char_regex(parsed: Tuple[object, Any], flags: int) -> Optional[z3.ExprRef]: """ Takes a pattern object, like those returned by sre_parse.parse(). Returns None if `parsed` is not a single-character regular expression. Returns an equivalent z3 regular expression if it can find one, or raises ReUnhandled if such an expression cannot be determined. """ (op, arg) = parsed if op is LITERAL: if re.IGNORECASE & flags: # TODO: when z3 gets unicode string support, case invariant matching # might need to be more complex. (see the casefold() builtin) return z3.Union(z3.Re(chr(arg).lower()), z3.Re(chr(arg).upper())) else: return z3.Re(chr(arg)) elif op is RANGE: lo, hi = arg if re.IGNORECASE & flags: # TODO: when z3 gets unicode string support, case invariant matching # might need to be more complex. (see the casefold() builtin) return z3.Union( z3.Range(chr(lo).lower(), chr(hi).lower()), z3.Range(chr(lo).upper(), chr(hi).upper()), ) else: return z3.Range(chr(lo), chr(hi)) elif op is IN: return z3.Union(*(single_char_regex(a, flags) for a in arg)) elif op is CATEGORY: if arg == CATEGORY_DIGIT: # TODO: when z3 gets unicode string support, we'll need to # extend this logic return z3.Range("0", "9") raise ReUnhandled elif op is ANY and arg is None: # TODO: when z3 gets unicode string support, we'll need to # revise this logic if re.DOTALL & flags: return z3.Range(z3.Unit(z3.BitVecVal(0, 8)), z3.Unit(z3.BitVecVal(255, 8))) # return z3.Range(chr(0), chr(127)) else: return z3.Union( z3.Range(z3.Unit(z3.BitVecVal(0, 8)), z3.Unit(z3.BitVecVal(9, 8))), z3.Range(z3.Unit(z3.BitVecVal(11, 8)), z3.Unit(z3.BitVecVal(255, 8))), ) else: return None
def setupProblem(self): self.stackvars = {} self.Qf = z3.Function("final", z3.SeqSort(z3.IntSort()), z3.BoolSort()) self.d = z3.Function("delta", z3.SeqSort(z3.IntSort()), z3.StringSort(), z3.SeqSort(z3.IntSort())) for (w, st, final, has_children) in self.t.iter(): sv = b"stack" + base64.b16encode(w) self.stackvars[w] = Sequence(sv) self.s_add_finalstate(w, final) if final and has_children: self.s_add_nonemptystate(w) if len(w): self.s_add_transition_to(w) self.s.add(self.stackvars[b''] == z3.Unit(z3.IntVal(0))) # most useful convention: # accept by drained stack, but don't read any more and fail then self.s.add(self.Qf(z3.Empty(z3.SeqSort(z3.IntSort()))) == True)
def construct_from_z3_model(self, m, d, Qf, alphabet): to_check = [0] checked = set(to_check) print("Extracting tables") self.D = dict() self.QF = set() self.productive = None print("m[d] = %s" % m[d]) print("m[qf] = %s" % m[Qf]) symbols = set([0]) while len(to_check): current = to_check.pop() conf = z3.Unit(z3.IntVal(current)) for a in alphabet: # range(0, 256): y = m.evaluate( d( # z3.SubSeq(conf, z3.Length(conf)-1, 1), conf, z3.StringVal(bytes([a]))), model_completion=True) def extract_seq_as_list(y): result = List() for c in y.children(): if isinstance(c, z3.SeqRef): result += extract_seq_as_list(c) else: result += List([c.as_long()]) return result rhs = extract_seq_as_list(y) for symbol in rhs: symbols.add(symbol) Dq = self.D.setdefault(current, dict()) Dq[a] = rhs for i in rhs: if not i in checked: checked.add(i) to_check += [i] if m.evaluate(Qf(z3.Empty(z3.SeqSort(z3.IntSort())))): self.QF.add(List([])) print("(stack/q) symbols encountered: %s" % symbols) for symbol in symbols: conf = z3.Unit(z3.IntVal(symbol)) f = m.evaluate(Qf(conf)) if f: self.QF.add(List([symbol])) self.symbols = symbols