def test_and_containing_ors(self): """Does this explode? Right now, visit_branch assumes that Ands receive only strings or USELESS.""" eq_( visit_regex('(a|b)(c|d)'), Or([ And([ Or([And(['a']), And(['b'])]), Or([And(['c']), And(['d'])]) ]) ]))
def test_wtf(self): """Guard against an ill-defined WTF we had.""" eq_( visit_regex('(aa|b)(c|d)'), Or([ And([ Or([And(['aa']), And(['b'])]), Or([And(['c']), And(['d'])]) ]) ]))
def test_big_tree(self): """Try the ambitious tree (a|b)(c|d).""" eq_( Or([ And([ Or([And(['alpha']), And(['bravo'])]), Or([And(['charlie']), And(['delta'])]) ]) ]).simplified(), And([Or(['alpha', 'bravo']), Or(['charlie', 'delta'])]))
def test_empty(self): """Pin down what empty top-level trees turn into. I'm not sure the current state is desirable. '' is another possibility, but I worry about what Or(['hi', '']) means. [It means "hi can occur, or not", which makes it useless.] """ eq_(Or([And()]).simplified(), '')
def test_nopes(self): """These examples should not simplify down to strings.""" ok_(not isinstance( And(['smoo', Or(['him'])]).simplified(), basestring)) eq_(Or(['smoo', 'him']).simplified(), Or(['smoo', 'him']))
def test_single_strings(self): """These should simplify down to single strings.""" eq_(And(['smoo']).simplified(), 'smoo') eq_(And(Or(['smoo'])).simplified(), 'smoo') eq_(Or(And(['smoo'])).simplified(), 'smoo')
def test_unicode(self): """Make sure unicode range bounds work.""" # This is a span of only a few code points: shouldn't be USELESS. eq_simplified(u'[♣-♥]', Or([u'♣', u'♤', u'♥']))
def test_leading_bracket(self): """A ] as the first char in a class should be considered ordinary.""" eq_simplified('[]a]', Or([']', 'a'])) eq_simplified('[]]', ']')
def test_trailing_hyphen(self): """Trailing hyphens should be considered just ordinary hyphens.""" eq_simplified('[a-]', Or(['a', '-']))
def test_3_branches(self): eq_(visit_regex('ab|cd|ef'), Or([And(['ab']), And(['cd']), And(['ef'])]))
def test_classes(self): """Exercise the enumerated case.""" eq_simplified('[abc]', Or(['a', 'b', 'c']))
def test_nested_tree(self): """Make sure Ors containing Ands build properly.""" eq_(visit_regex('ab[^q](cd|ef)'), Or([And(['ab', Or([And(['cd']), And(['ef'])])])])) eq_(visit_regex('ab(cd|ef)'), Or([And(['ab', Or([And(['cd']), And(['ef'])])])]))
def test_empty_branch(self): """Make sure the right kind of tree is built when a branch is empty.""" eq_(visit_regex('(a||b)'), Or([And([Or([And(['a']), And(['']), And(['b'])])])]))
def test_anded_uselesses(self): """Make USELESSes break up contiguous strings of literals.""" eq_(visit_regex('ab[^q]cd'), Or([And(['ab', 'cd'])]))
def test_range(self): """Make sure character ranges expand.""" eq_simplified('[a-c]', Or(['a', 'b', 'c']))
def test_short_ngram_removal(self): """Substrings shorter than 3 chars should be removed.""" eq_(And(['oof', 'by', 'smurf']).simplified(), And(['oof', 'smurf'])) eq_(Or(['', 'by', 'smurf']).simplified(), 'smurf') eq_(Or([And(['', 'e', 'do']), 'hi']).simplified(), '')
def test_merge_literals(self): """Make sure we know how to merge adjacent char literals.""" eq_(visit_regex('abcd'), Or([And(['abcd'])]))