s0.add_successor('a', s1) s0.add_successor('c', s1) s0.add_successor('b', s3) s0.add_successor('d', s5) s1.add_successor('a', s2) s3.add_successor('a', s4) s3.add_successor('b', s4) s5.add_successor('d', s6) aut = Automaton(s0, set([s4,s6])) aut = determinize(remove_lambdas(aut)) wt = automaton_to_wordtree(aut, 2) #wt = remove_empty_subtrees(wt) # >s0 -a,c> s1 -a> (s2) # 4 1 1 # a:1/4 a:1 # c:1/4 # -b> s3 -a,b> (s4) # 2 1 # b:1/2 a:1/2 # b:1/2 # aa -> 1/4 # ca -> 1/4
digi = '0123456789' #punc = '@&"\'(§!)-$,;:=<#°_*%£?./+>{}€[]' punc = '\'"()[]{}.,;:-@&' text = text.lower() a1 = text_to_automaton(text, lambda x: x.isalpha(), 0) text = text.upper() a2 = text_to_automaton(text, lambda x: x.isalpha(), 0) regex = Choice(Automaton(a1), Automaton(a2)) wt3 = regex_to_wordtree(regex, 16) text = text.lower() + text.upper() a4 = determinize( remove_lambdas(text_to_automaton(text, lambda x: x.isalpha(), 0))) wt4 = automaton_to_wordtree(a4, 16) words = ['hello', 'this', 'can', 'world', ''] for word in words: print("a1 accepts '" + word + "' : " + str(a1.accepts(word))) reg = Repeat(Concat(Automaton(a1), Concat(Range(digi), Range(digi)))) aut = regex_to_automaton(reg) aut = determinize(remove_lambdas(aut)) wt = remove_empty_subtrees(automaton_to_wordtree(aut, 16)) print("wt can produce " + str(wt.wordscount) + " words.") wt2 = automaton_to_wordtree(a1, 16) print("wt2 can produce " + str(wt2.wordscount) + " words.")
# Checking acceptance tests = [ 'ab', 'ac', 'a', 'abcbc', 'accccc', 'abcc', 'cb', 'cac', 'acca', 'abcbbcac', 'abcd', 'abce', 'ebbbb', '' ] for test in tests: print('aut accepts \'' + test + '\' : ' + str(aut.accepts(test))) # Check copy autcopy = aut.copy() for test in tests: print('autcopy accepts \'' + test + '\' : ' + str(autcopy.accepts(test))) autcopy = remove_lambdas(autcopy) for test in tests: print('autcopy without lambdas accepts \'' + test + '\' : ' + str(autcopy.accepts(test))) a1 = remove_lambdas(regex_to_automaton(Range('a'))) print("a1 accepts 'a' : " + str(a1.accepts('a'))) print("a1 accepts 'aa' : " + str(a1.accepts('aa'))) print("a1 accepts 'b' : " + str(a1.accepts('b'))) print("a1 accepts '' : " + str(a1.accepts(''))) a2 = remove_lambdas(regex_to_automaton(Concat(Range('a'), Range('b')))) print("a2 accepts 'ab' : " + str(a2.accepts('ab'))) print("a2 accepts 'a' : " + str(a2.accepts('a'))) print("a2 accepts 'aa' : " + str(a2.accepts('aa'))) print("a2 accepts 'b' : " + str(a2.accepts('b')))
from mepgen.automaton.transformation import reject_short_words, remove_lambdas from mepgen.regex.regex import * from mepgen.regex.transformation import regex_to_automaton reg = Repeat(Range('a')) aut = regex_to_automaton(reg) tests = ['', 'a', 'b', 'aa', 'ab', 'ba', 'aaa', 'aaaa', 'aaaaa'] for test in tests: print("aut '{0}' : {1}".format(test, aut.accepts(test) and "accepting" or "not accepting")) autrlw = reject_short_words(remove_lambdas(aut), 4) for test in tests: print("autrlw '{0}' : {1}".format(test, autrlw.accepts(test) and "accepting" or "not accepting"))
from mepgen.automaton.transformation import reject_short_words, remove_lambdas from mepgen.regex.regex import * from mepgen.regex.transformation import regex_to_automaton reg = Repeat(Range('a')) aut = regex_to_automaton(reg) tests = ['', 'a', 'b', 'aa', 'ab', 'ba', 'aaa', 'aaaa', 'aaaaa'] for test in tests: print("aut '{0}' : {1}".format( test, aut.accepts(test) and "accepting" or "not accepting")) autrlw = reject_short_words(remove_lambdas(aut), 4) for test in tests: print("autrlw '{0}' : {1}".format( test, autrlw.accepts(test) and "accepting" or "not accepting"))
from mepgen.regex.regex import Range, Repeat, Choice, Concat, Automaton from mepgen.regex.transformation import regex_to_automaton from mepgen.automaton.transformation import remove_lambdas, determinize regex = Repeat(Choice(Range(frozenset(['a'])), Range(frozenset(['b'])))) aut = regex_to_automaton(regex) print("aut is deterministic :", aut.is_deterministic()) tests = ['', 'a', 'b', 'c', 'aa', 'ba', 'ca', 'aaa', 'aba', 'aab', 'abc'] for test in tests: print("aut accepts '" + test + "' :", str(aut.accepts(test))) aut = remove_lambdas(aut) print("aut is deterministic :", aut.is_deterministic()) tests = ['', 'a', 'b', 'c', 'aa', 'ba', 'ca', 'aaa', 'aba', 'aab', 'abc'] for test in tests: print("aut accepts '" + test + "' :", str(aut.accepts(test))) aut = determinize(aut) print("aut is deterministic :", aut.is_deterministic()) tests = ['', 'a', 'b', 'c', 'aa', 'ba', 'ca', 'aaa', 'aba', 'aab', 'abc'] for test in tests: print("aut accepts '" + test + "' :", str(aut.accepts(test)))
from mepgen.regex.transformation import regex_to_automaton, regex_to_wordtree f = open("lovecraft.txt", "r") text = f.read() f.close() digi = '0123456789' punc = '\'"()[]{}.,;:-@&' alphafilter = lambda x: x.isalpha() threshold = 0.05 minwordlen = 4 text = text.lower() words = reject_short_words( remove_lambdas(text_to_automaton(text, alphafilter, threshold)), minwordlen) text = text.upper() WORDS = reject_short_words( remove_lambdas(text_to_automaton(text, alphafilter, threshold)), minwordlen) allWords = Choice(Automaton(words), Automaton(WORDS)) sep = Choice(Range(punc), Choice(Range(digi), Concat(Range(digi), Range(digi)))) regex = Concat(allWords, Repeat(Concat(sep, allWords))) wordtree = regex_to_wordtree(regex, 16) print("wordtree can produce " + str(wordtree.wordscount) + " words.") alph = list(wordtree.get_alphabet()) alph.sort()
'acca', 'abcbbcac', 'abcd', 'abce', 'ebbbb', '' ] for test in tests: print('aut accepts \'' + test + '\' : ' + str(aut.accepts(test))) # Check copy autcopy = aut.copy() for test in tests: print('autcopy accepts \'' + test + '\' : ' + str(autcopy.accepts(test))) autcopy = remove_lambdas(autcopy) for test in tests: print('autcopy without lambdas accepts \'' + test + '\' : ' + str(autcopy.accepts(test))) a1 = remove_lambdas(regex_to_automaton(Range('a'))) print("a1 accepts 'a' : " + str(a1.accepts('a'))) print("a1 accepts 'aa' : " + str(a1.accepts('aa'))) print("a1 accepts 'b' : " + str(a1.accepts('b'))) print("a1 accepts '' : " + str(a1.accepts(''))) a2 = remove_lambdas(regex_to_automaton(Concat(Range('a'),Range('b')))) print("a2 accepts 'ab' : " + str(a2.accepts('ab'))) print("a2 accepts 'a' : " + str(a2.accepts('a'))) print("a2 accepts 'aa' : " + str(a2.accepts('aa'))) print("a2 accepts 'b' : " + str(a2.accepts('b'))) print("a2 accepts '' : " + str(a2.accepts('')))
from mepgen.regex.regex import Range, Repeat, Choice, Concat, Automaton from mepgen.regex.transformation import regex_to_automaton from mepgen.automaton.transformation import remove_lambdas, determinize regex = Repeat(Choice(Range(frozenset(["a"])), Range(frozenset(["b"])))) aut = regex_to_automaton(regex) print("aut is deterministic :", aut.is_deterministic()) tests = ["", "a", "b", "c", "aa", "ba", "ca", "aaa", "aba", "aab", "abc"] for test in tests: print("aut accepts '" + test + "' :", str(aut.accepts(test))) aut = remove_lambdas(aut) print("aut is deterministic :", aut.is_deterministic()) tests = ["", "a", "b", "c", "aa", "ba", "ca", "aaa", "aba", "aab", "abc"] for test in tests: print("aut accepts '" + test + "' :", str(aut.accepts(test))) aut = determinize(aut) print("aut is deterministic :", aut.is_deterministic()) tests = ["", "a", "b", "c", "aa", "ba", "ca", "aaa", "aba", "aab", "abc"] for test in tests: print("aut accepts '" + test + "' :", str(aut.accepts(test)))
f = open("lovecraft.txt", "r") text = f.read() f.close() digi = '0123456789' punc = '\'"()[]{}.,;:-@&' alphafilter = lambda x : x.isalpha() threshold = 0.05 minwordlen = 4 text = text.lower() words = reject_short_words( remove_lambdas( text_to_automaton(text, alphafilter, threshold) ), minwordlen ) text = text.upper() WORDS = reject_short_words( remove_lambdas( text_to_automaton(text, alphafilter, threshold) ), minwordlen ) allWords = Choice(Automaton(words), Automaton(WORDS)) sep = Choice(Range(punc), Choice(Range(digi), Concat(Range(digi), Range(digi)))) regex = Concat(allWords, Repeat(Concat(sep, allWords)))
#punc = '@&"\'(§!)-$,;:=<#°_*%£?./+>{}€[]' punc = '\'"()[]{}.,;:-@&' text = text.lower() a1 = text_to_automaton(text, lambda x : x.isalpha(), 0) text = text.upper() a2 = text_to_automaton(text, lambda x : x.isalpha(), 0) regex = Choice(Automaton(a1), Automaton(a2)) wt3 = regex_to_wordtree(regex, 16) text = text.lower() + text.upper() a4 = determinize( remove_lambdas( text_to_automaton(text, lambda x : x.isalpha(), 0) ) ) wt4 = automaton_to_wordtree(a4, 16) words = ['hello', 'this', 'can', 'world', ''] for word in words: print("a1 accepts '" + word + "' : " + str(a1.accepts(word))) reg = Repeat(Concat(Automaton(a1), Concat(Range(digi), Range(digi)))) aut = regex_to_automaton(reg) aut = determinize(remove_lambdas(aut)) wt = remove_empty_subtrees(automaton_to_wordtree(aut, 16)) print("wt can produce " + str(wt.wordscount) + " words.")