def test_star_advanced(): # This is (a*ba)*. Naively connecting the final states to the initial state # gives the incorrect result here. starred = FSM( alphabet = {"a", "b"}, states = {0, 1, 2, "oblivion"}, initial = 0, finals = {2}, map = { 0 : {"a" : 0 , "b" : 1 }, 1 : {"a" : 2 , "b" : "oblivion"}, 2 : {"a" : "oblivion", "b" : "oblivion"}, "oblivion" : {"a" : "oblivion", "b" : "oblivion"}, } ).star() assert starred.alphabet == frozenset(["a", "b"]) assert starred.accepts("") assert not starred.accepts("a") assert not starred.accepts("b") assert not starred.accepts("aa") assert starred.accepts("ba") assert starred.accepts("aba") assert starred.accepts("aaba") assert not starred.accepts("aabb") assert starred.accepts("abababa")
def test_empty(a, b): assert not a.empty() assert not b.empty() assert FSM( alphabet = {}, states = {0, 1}, initial = 0, finals = {1}, map = {0:{}, 1:{}}, ).empty() assert not FSM( alphabet = {}, states = {0}, initial = 0, finals = {0}, map = {0:{}}, ).empty() assert FSM( alphabet = {"a", "b"}, states = {0, 1, None, 2}, initial = 0, finals = {2}, map = { 0 : {"a" : 1 , "b" : 1 }, 1 : {"a" : None, "b" : None}, None : {"a" : None, "b" : None}, 2 : {"a" : None, "b" : None}, }, ).empty()
def test_alphabet_unions(): # Thanks to sparse maps it should now be possible to compute the union of FSMs # with disagreeing alphabets! a = FSM( alphabet = {"a"}, states = {0, 1}, initial = 0, finals = {1}, map = { 0 : {"a" : 1}, }, ) b = FSM( alphabet = {"b"}, states = {0, 1}, initial = 0, finals = {1}, map = { 0 : {"b" : 1}, }, ) assert (a | b).accepts(["a"]) assert (a | b).accepts(["b"]) assert (a & b).empty() assert (a + b).accepts(["a", "b"]) assert (a ^ b).accepts(["a"]) assert (a ^ b).accepts(["b"])
def test_dead_default(): ''' You may now omit a transition, or even an entire state, from the map. This affects every usage of `fsm.map`. ''' blockquote = FSM( alphabet = {"/", "*", anything_else}, states = {0, 1, 2, 3, 4, 5}, initial = 0, finals = {4}, map = { 0 : {"/" : 1}, 1 : {"*" : 2}, 2 : {"/" : 2, anything_else : 2, "*" : 3}, 3 : {"/" : 4, anything_else : 2, "*" : 3}, } ) assert blockquote.accepts(["/", "*", "whatever", "*", "/"]) assert not blockquote.accepts(["*", "*", "whatever", "*", "/"]) str(blockquote) # test stringification blockquote | blockquote blockquote & blockquote blockquote ^ blockquote reversed(blockquote) assert not blockquote.everythingbut().accepts(["/", "*", "whatever", "*", "/"]) assert blockquote.everythingbut().accepts(["*"]) # deliberately seek oblivion assert blockquote.islive(3) assert blockquote.islive(4) assert not blockquote.islive(5) gen = blockquote.strings() assert next(gen) == ["/", "*", "*", "/"]
def test_anything_else_acceptance(): a = FSM( alphabet = {"a", "b", "c", anything_else}, states = {1}, initial = 1, finals = {1}, map = { 1 : {"a" : 1, "b" : 1, "c" : 1, anything_else : 1} }, ) assert a.accepts("d")
def test_new_set_methods(a, b): # A whole bunch of new methods were added to the FSM module to enable FSMs to # function exactly as if they were sets of strings (symbol lists), see: # https://docs.python.org/3/library/stdtypes.html#set-types-set-frozenset # But do they work? assert len(a) == 1 assert len((a | b) * 4) == 16 try: len(a.star()) assert False except OverflowError: pass # "in" assert "a" in a assert not "a" in b assert "a" not in b # List comprehension! four = (a | b) * 2 for string in four: assert string == ["a", "a"] break assert [s for s in four] == [["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]] # set.union() imitation assert FSM.union(a, b) == a.union(b) assert len(FSM.union()) == 0 assert FSM.intersection(a, b) == a.intersection(b) # This takes a little explaining. In general, `a & b & c` is equivalent to # `EVERYTHING & a & b & c` where `EVERYTHING` is an FSM accepting every # possible string. Similarly `a` is equivalent to `EVERYTHING & a`, and the # intersection of no sets at all is... `EVERYTHING`. # However, since we compute the union of alphabets, and there are no # alphabets, the union is the empty set. So the only string which `EVERYTHING` # actually recognises is the empty string, [] (or "" if you prefer). int_none = FSM.intersection() assert len(int_none) == 1 assert [] in int_none assert (a | b).difference(a) == FSM.difference((a | b), a) == (a | b) - a == b assert (a | b).difference(a, b) == FSM.difference((a | b), a, b) == (a | b) - a - b == null("ab") assert a.symmetric_difference(b) == FSM.symmetric_difference(a, b) == a ^ b assert a.isdisjoint(b) assert a <= (a | b) assert a < (a | b) assert a != (a | b) assert (a | b) > a assert (a | b) >= a assert list(a.concatenate(a, a).strings()) == [["a", "a", "a"]] assert list(a.concatenate().strings()) == [["a"]] assert list(FSM.concatenate(b, a, b).strings()) == [["b", "a", "b"]] assert list(FSM.concatenate().strings()) == [] assert not a.copy() is a
def test_reduce(): # FSM accepts no strings but has 3 states, needs only 1 asdf = FSM( alphabet = {None}, states = {0, 1, 2}, initial = 0, finals = {1}, map = { 0 : {None : 2}, 1 : {None : 2}, 2 : {None : 2}, }, ) asdf = asdf.reduce() assert len(asdf.states) == 1
def test_invalid_fsms(): # initial state 1 is not a state try: FSM( alphabet = {}, states = {}, initial = 1, finals = set(), map = {} ) assert False except AssertionError: assert False except Exception: pass # final state 2 not a state try: FSM( alphabet = {}, states = {1}, initial = 1, finals = {2}, map = {} ) assert False except AssertionError: assert False except Exception: pass # invalid transition for state 1, symbol "a" try: FSM( alphabet = {"a"}, states = {1}, initial = 1, finals = set(), map = { 1 : {"a" : 2} } ) assert False except AssertionError: assert False except Exception: pass
def test_reverse_brzozowski(): # This is (a|b)*a(a|b) brzozowski = FSM( alphabet = {"a", "b"}, states = {"A", "B", "C", "D", "E"}, initial = "A", finals = {"C", "E"}, map = { "A" : {"a" : "B", "b" : "D"}, "B" : {"a" : "C", "b" : "E"}, "C" : {"a" : "C", "b" : "E"}, "D" : {"a" : "B", "b" : "D"}, "E" : {"a" : "B", "b" : "D"}, }, ) assert brzozowski.accepts("aa") assert brzozowski.accepts("ab") assert brzozowski.accepts("aab") assert brzozowski.accepts("bab") assert brzozowski.accepts("abbbbbbbab") assert not brzozowski.accepts("") assert not brzozowski.accepts("a") assert not brzozowski.accepts("b") assert not brzozowski.accepts("ba") assert not brzozowski.accepts("bb") assert not brzozowski.accepts("bbbbbbbbbbbb") # So this is (a|b)a(a|b)* b2 = reversed(brzozowski) assert b2.accepts("aa") assert b2.accepts("ba") assert b2.accepts("baa") assert b2.accepts("bab") assert b2.accepts("babbbbbbba") assert not b2.accepts("") assert not b2.accepts("a") assert not b2.accepts("b") assert not b2.accepts("ab") assert not b2.accepts("bb") assert not b2.accepts("bbbbbbbbbbbb") # Test string generator functionality. gen = b2.strings() assert next(gen) == ["a", "a"] assert next(gen) == ["b", "a"] assert next(gen) == ["a", "a", "a"] assert next(gen) == ["a", "a", "b"] assert next(gen) == ["b", "a", "a"] assert next(gen) == ["b", "a", "b"] assert next(gen) == ["a", "a", "a", "a"]
def to_fsm(self, alphabet=None, prefix_postfix=None, flags=None) -> FSM: if alphabet is None: alphabet = self.alphabet if prefix_postfix is None: prefix_postfix = self.prefix_postfix if flags is None: flags = _REFlags(0) flags = _combine_flags(flags, self.added_flags, self.removed_flags) return FSM.union(*(o.to_fsm(alphabet, prefix_postfix, flags) for o in self.options))
def test_bug_36(): etc1 = FSM( alphabet = {anything_else}, states = {0}, initial = 0, finals = {0}, map = { 0: { anything_else: 0 } } ) etc2 = FSM( alphabet = {'s', anything_else}, states = {0, 1}, initial = 0, finals = {1}, map = { 0: { 's': 1 }, 1: { 's': 1, anything_else: 1 } } ) both = etc1 & etc2 assert etc1.accepts(["s"]) assert etc2.accepts(["s"]) assert both.alphabet == {anything_else, "s"} assert both.accepts(["s"])
def test_addbug(): # Odd bug with fsm.__add__(), exposed by "[bc]*c" int5A = FSM( alphabet = {"a", "b", "c", anything_else}, states = {0, 1}, initial = 1, finals = {1}, map = { 0: {anything_else: 0, "a": 0, "b": 0, "c": 0}, 1: {anything_else: 0, "a": 0, "b": 1, "c": 1}, } ) assert int5A.accepts("") int5B = FSM( alphabet = {"a", "b", "c", anything_else}, states = {0, 1, 2}, initial = 1, finals = {0}, map = { 0: {anything_else: 2, "a": 2, "b": 2, "c": 2}, 1: {anything_else: 2, "a": 2, "b": 2, "c": 0}, 2: {anything_else: 2, "a": 2, "b": 2, "c": 2}, } ) assert int5B.accepts("c") int5C = int5A + int5B assert int5C.accepts("c")
def b(): b = FSM( alphabet = {"a", "b"}, states = {0, 1, "ob"}, initial = 0, finals = {1}, map = { 0 : {"a" : "ob", "b" : 1 }, 1 : {"a" : "ob", "b" : "ob"}, "ob" : {"a" : "ob", "b" : "ob"}, }, ) return b
def test_oblivion_crawl(a): # When crawling a new FSM, we should avoid generating an oblivion state. # `abc` has no oblivion state... all the results should not as well! abc = FSM( alphabet = {"a", "b", "c"}, states = {0, 1, 2, 3}, initial = 0, finals = {3}, map = { 0 : {"a" : 1}, 1 : {"b" : 2}, 2 : {"c" : 3}, } ) assert len((abc + abc).states) == 7 assert len(abc.star().states) == 3 assert len((abc * 3).states) == 10 assert len(reversed(abc).states) == 4 assert len((abc | abc).states) == 4 assert len((abc & abc).states) == 4 assert len((abc ^ abc).states) == 1 assert len((abc - abc).states) == 1
def to_fsm(self, alphabet=None, prefix_postfix=None, flags=None) -> FSM: if alphabet is None: alphabet = self.alphabet if prefix_postfix is None: prefix_postfix = self.prefix_postfix if prefix_postfix != (0, 0): raise ValueError("Can not have prefix/postfix on CharGroup-level") base = FSM.union(*(g.to_fsm(alphabet, flags=flags) for g in self.groups)) if self.negate: return _ALL.to_fsm(alphabet).difference(base) else: return base
def to_fsm(self, alphabet=None, prefix_postfix=None, flags=None) -> FSM: if alphabet is None: alphabet = self.alphabet if prefix_postfix is None: prefix_postfix = self.prefix_postfix all = _ALL.to_fsm(alphabet) all_star = all.star() fsm_parts = [] current = [all.times(prefix_postfix[0])] for part in self.parts: if isinstance(part, _NonCapturing): inner = part.inner.to_fsm(alphabet, (0, 0), flags) if part.backwards: raise NotImplementedError("lookbacks are not implemented") else: # try: # inner.cardinality() # except OverflowError: # raise NotImplementedError("Can not deal with infinite length lookaheads") fsm_parts.append((None, current)) fsm_parts.append((part, inner)) current = [] else: current.append(part.to_fsm(alphabet, (0, 0), flags)) current.append(all.times(prefix_postfix[1])) result = FSM.concatenate(*current) for m, f in reversed(fsm_parts): if m is None: result = FSM.concatenate(*f, result) else: assert isinstance(m, _NonCapturing) and not m.backwards if m.negate: result = result.difference(f) else: result = result.intersection(f + all_star) return result
def to_fsm(self, alphabet=None, prefix_postfix=None, flags=None) -> FSM: if alphabet is None: alphabet = self.alphabet if flags is None or not flags & _REFlags.SINGLE_LINE: chars = alphabet - {'\n'} else: chars = alphabet return FSM( alphabet=alphabet, states={0, 1}, initial=0, finals={1}, map={0: {symbol: 1 for symbol in chars}}, )
def test_reverse_abc(): abc = FSM( alphabet = {"a", "b", "c"}, states = {0, 1, 2, 3, None}, initial = 0, finals = {3}, map = { 0 : {"a" : 1 , "b" : None, "c" : None}, 1 : {"a" : None, "b" : 2 , "c" : None}, 2 : {"a" : None, "b" : None, "c" : 3 }, 3 : {"a" : None, "b" : None, "c" : None}, None : {"a" : None, "b" : None, "c" : None}, }, ) cba = reversed(abc) assert cba.accepts("cba")
def test_difference(a, b): aorb = FSM( alphabet = {"a", "b"}, states = {0, 1, None}, initial = 0, finals = {1}, map = { 0 : {"a" : 1 , "b" : 1 }, 1 : {"a" : None, "b" : None}, None : {"a" : None, "b" : None}, }, ) assert list((a ^ a).strings()) == [] assert list((b ^ b).strings()) == [] assert list((a ^ b).strings()) == [["a"], ["b"]] assert list((aorb ^ a).strings()) == [["b"]]
def to_fsm(self, alphabet=None, prefix_postfix=None, flags=None) -> FSM: if alphabet is None: alphabet = self.alphabet if prefix_postfix is None: prefix_postfix = self.prefix_postfix if prefix_postfix != (0, 0): raise ValueError("Can not have prefix/postfix on CharGroup-level") insensitive = False if flags is not None: insensitive = flags & _REFlags.CASE_INSENSITIVE flags &= ~_REFlags.CASE_INSENSITIVE flags &= ~_REFlags.SINGLE_LINE if flags: raise NotImplementedError(flags) if insensitive: chars = frozenset({ *(c.lower() for c in self.chars), *(c.upper() for c in self.chars) }) else: chars = self.chars # 0 is initial, 1 is final # If negated, make a singular FSM accepting any other characters if self.negated: mapping = { 0: dict([(symbol, 1) for symbol in alphabet - chars]), } # If normal, make a singular FSM accepting only these characters else: mapping = { 0: dict([(symbol, 1) for symbol in chars]), } return FSM( alphabet=alphabet, states={0, 1}, initial=0, finals={1}, map=mapping, )
def test_bug_28(): # This is (ab*)* and it caused some defects. abstar = FSM( alphabet = {'a', 'b'}, states = {0, 1}, initial = 0, finals = {1}, map = { 0: {'a': 1}, 1: {'b': 1} } ) assert abstar.accepts("a") assert not abstar.accepts("b") assert abstar.accepts("ab") assert abstar.accepts("abb") abstarstar = abstar.star() assert abstarstar.accepts("a") assert not abstarstar.accepts("b") assert abstarstar.accepts("ab") assert not abstar.star().accepts("bb")
def test_crawl_reduction(): # this is "0*1" in heavy disguise. crawl should resolve this duplication # Notice how states 2 and 3 behave identically. When resolved together, # states 1 and 2&3 also behave identically, so they, too should be resolved # (this is impossible to spot before 2 and 3 have been combined). # Finally, the oblivion state should be omitted. merged = FSM( alphabet = {"0", "1"}, states = {1, 2, 3, 4, "oblivion"}, initial = 1, finals = {4}, map = { 1 : {"0" : 2 , "1" : 4 }, 2 : {"0" : 3 , "1" : 4 }, 3 : {"0" : 3 , "1" : 4 }, 4 : {"0" : "oblivion", "1" : "oblivion"}, "oblivion" : {"0" : "oblivion", "1" : "oblivion"}, } ).reduce() assert len(merged.states) == 2
def test_concatenate_bug(a): # This exposes a defect in fsm.concatenate. assert FSM.concatenate(a, epsilon({"a"}), a).accepts("aa") assert FSM.concatenate(a, epsilon({"a"}), epsilon({"a"}), a).accepts("aa")
def test_binary_3(): # Binary numbers divisible by 3. # Disallows the empty string # Allows "0" on its own, but not leading zeroes. div3 = FSM( alphabet = {"0", "1"}, states = {"initial", "zero", 0, 1, 2, None}, initial = "initial", finals = {"zero", 0}, map = { "initial" : {"0" : "zero", "1" : 1 }, "zero" : {"0" : None , "1" : None}, 0 : {"0" : 0 , "1" : 1 }, 1 : {"0" : 2 , "1" : 0 }, 2 : {"0" : 1 , "1" : 2 }, None : {"0" : None , "1" : None}, }, ) assert not div3.accepts("") assert div3.accepts("0") assert not div3.accepts("1") assert not div3.accepts("00") assert not div3.accepts("01") assert not div3.accepts("10") assert div3.accepts("11") assert not div3.accepts("000") assert not div3.accepts("001") assert not div3.accepts("010") assert not div3.accepts("011") assert not div3.accepts("100") assert not div3.accepts("101") assert div3.accepts("110") assert not div3.accepts("111") assert not div3.accepts("0000") assert not div3.accepts("0001") assert not div3.accepts("0010") assert not div3.accepts("0011") assert not div3.accepts("0100") assert not div3.accepts("0101") assert not div3.accepts("0110") assert not div3.accepts("0111") assert not div3.accepts("1000") assert div3.accepts("1001")