Example #1
0
s0.add_successor('a', s1)
s0.add_successor('c', s1)
s0.add_successor('b', s3)
s0.add_successor('d', s5)

s1.add_successor('a', s2)

s3.add_successor('a', s4)
s3.add_successor('b', s4)

s5.add_successor('d', s6)


aut = Automaton(s0, set([s4,s6]))
aut = determinize(remove_lambdas(aut))

wt = automaton_to_wordtree(aut, 2)
#wt = remove_empty_subtrees(wt)

#   >s0 -a,c>   s1  -a>     (s2)
#   4           1           1
#       a:1/4       a:1
#       c:1/4
#       -b>     s3  -a,b>   (s4)
#               2           1
#       b:1/2       a:1/2
#                   b:1/2

# aa -> 1/4
# ca -> 1/4
Example #2
0
digi = '0123456789'
#punc = '@&"\'(§!)-$,;:=<#°_*%£?./+>{}€[]'
punc = '\'"()[]{}.,;:-@&'

text = text.lower()
a1 = text_to_automaton(text, lambda x: x.isalpha(), 0)

text = text.upper()
a2 = text_to_automaton(text, lambda x: x.isalpha(), 0)

regex = Choice(Automaton(a1), Automaton(a2))
wt3 = regex_to_wordtree(regex, 16)

text = text.lower() + text.upper()
a4 = determinize(
    remove_lambdas(text_to_automaton(text, lambda x: x.isalpha(), 0)))
wt4 = automaton_to_wordtree(a4, 16)

words = ['hello', 'this', 'can', 'world', '']
for word in words:
    print("a1 accepts '" + word + "' : " + str(a1.accepts(word)))

reg = Repeat(Concat(Automaton(a1), Concat(Range(digi), Range(digi))))
aut = regex_to_automaton(reg)
aut = determinize(remove_lambdas(aut))

wt = remove_empty_subtrees(automaton_to_wordtree(aut, 16))
print("wt can produce " + str(wt.wordscount) + " words.")

wt2 = automaton_to_wordtree(a1, 16)
print("wt2 can produce " + str(wt2.wordscount) + " words.")
Example #3
0
# Checking acceptance
tests = [
    'ab', 'ac', 'a', 'abcbc', 'accccc', 'abcc', 'cb', 'cac', 'acca',
    'abcbbcac', 'abcd', 'abce', 'ebbbb', ''
]

for test in tests:
    print('aut accepts \'' + test + '\' : ' + str(aut.accepts(test)))

# Check copy
autcopy = aut.copy()
for test in tests:
    print('autcopy accepts \'' + test + '\' : ' + str(autcopy.accepts(test)))

autcopy = remove_lambdas(autcopy)
for test in tests:
    print('autcopy without lambdas accepts \'' + test + '\' : ' +
          str(autcopy.accepts(test)))

a1 = remove_lambdas(regex_to_automaton(Range('a')))
print("a1 accepts 'a' : " + str(a1.accepts('a')))
print("a1 accepts 'aa' : " + str(a1.accepts('aa')))
print("a1 accepts 'b' : " + str(a1.accepts('b')))
print("a1 accepts '' : " + str(a1.accepts('')))

a2 = remove_lambdas(regex_to_automaton(Concat(Range('a'), Range('b'))))
print("a2 accepts 'ab' : " + str(a2.accepts('ab')))
print("a2 accepts 'a' : " + str(a2.accepts('a')))
print("a2 accepts 'aa' : " + str(a2.accepts('aa')))
print("a2 accepts 'b' : " + str(a2.accepts('b')))
Example #4
0
from mepgen.automaton.transformation import reject_short_words, remove_lambdas
from mepgen.regex.regex import *
from mepgen.regex.transformation import regex_to_automaton

reg = Repeat(Range('a'))
aut = regex_to_automaton(reg)

tests = ['', 'a', 'b', 'aa', 'ab', 'ba', 'aaa', 'aaaa', 'aaaaa']
for test in tests:
    print("aut '{0}' : {1}".format(test, aut.accepts(test) and "accepting" or "not accepting"))
    
autrlw = reject_short_words(remove_lambdas(aut), 4)
for test in tests:
    print("autrlw '{0}' : {1}".format(test, autrlw.accepts(test) and "accepting" or "not accepting"))
Example #5
0
from mepgen.automaton.transformation import reject_short_words, remove_lambdas
from mepgen.regex.regex import *
from mepgen.regex.transformation import regex_to_automaton

reg = Repeat(Range('a'))
aut = regex_to_automaton(reg)

tests = ['', 'a', 'b', 'aa', 'ab', 'ba', 'aaa', 'aaaa', 'aaaaa']
for test in tests:
    print("aut '{0}' : {1}".format(
        test,
        aut.accepts(test) and "accepting" or "not accepting"))

autrlw = reject_short_words(remove_lambdas(aut), 4)
for test in tests:
    print("autrlw '{0}' : {1}".format(
        test,
        autrlw.accepts(test) and "accepting" or "not accepting"))
Example #6
0
from mepgen.regex.regex import Range, Repeat, Choice, Concat, Automaton
from mepgen.regex.transformation import regex_to_automaton
from mepgen.automaton.transformation import remove_lambdas, determinize

regex = Repeat(Choice(Range(frozenset(['a'])), Range(frozenset(['b']))))
aut = regex_to_automaton(regex)
print("aut is deterministic :", aut.is_deterministic())

tests = ['', 'a', 'b', 'c', 'aa', 'ba', 'ca', 'aaa', 'aba', 'aab', 'abc']
for test in tests:
    print("aut accepts '" + test + "' :", str(aut.accepts(test)))

aut = remove_lambdas(aut)
print("aut is deterministic :", aut.is_deterministic())

tests = ['', 'a', 'b', 'c', 'aa', 'ba', 'ca', 'aaa', 'aba', 'aab', 'abc']
for test in tests:
    print("aut accepts '" + test + "' :", str(aut.accepts(test)))

aut = determinize(aut)
print("aut is deterministic :", aut.is_deterministic())

tests = ['', 'a', 'b', 'c', 'aa', 'ba', 'ca', 'aaa', 'aba', 'aab', 'abc']
for test in tests:
    print("aut accepts '" + test + "' :", str(aut.accepts(test)))
Example #7
0
from mepgen.regex.transformation import regex_to_automaton, regex_to_wordtree

f = open("lovecraft.txt", "r")
text = f.read()
f.close()

digi = '0123456789'
punc = '\'"()[]{}.,;:-@&'

alphafilter = lambda x: x.isalpha()
threshold = 0.05
minwordlen = 4

text = text.lower()
words = reject_short_words(
    remove_lambdas(text_to_automaton(text, alphafilter, threshold)),
    minwordlen)
text = text.upper()
WORDS = reject_short_words(
    remove_lambdas(text_to_automaton(text, alphafilter, threshold)),
    minwordlen)

allWords = Choice(Automaton(words), Automaton(WORDS))
sep = Choice(Range(punc), Choice(Range(digi), Concat(Range(digi),
                                                     Range(digi))))
regex = Concat(allWords, Repeat(Concat(sep, allWords)))
wordtree = regex_to_wordtree(regex, 16)

print("wordtree can produce " + str(wordtree.wordscount) + " words.")
alph = list(wordtree.get_alphabet())
alph.sort()
Example #8
0
            'acca',
            'abcbbcac',
            'abcd',
            'abce',
            'ebbbb',
            ''  ]
            
for test in tests:
    print('aut accepts \'' + test + '\' : ' + str(aut.accepts(test)))
    
# Check copy
autcopy = aut.copy()        
for test in tests:
    print('autcopy accepts \'' + test + '\' : ' + str(autcopy.accepts(test)))

autcopy = remove_lambdas(autcopy)
for test in tests:
    print('autcopy without lambdas accepts \'' + test + '\' : ' + str(autcopy.accepts(test)))
    
a1 = remove_lambdas(regex_to_automaton(Range('a')))
print("a1 accepts 'a' : " + str(a1.accepts('a')))
print("a1 accepts 'aa' : " + str(a1.accepts('aa')))
print("a1 accepts 'b' : " + str(a1.accepts('b')))
print("a1 accepts '' : " + str(a1.accepts('')))

a2 = remove_lambdas(regex_to_automaton(Concat(Range('a'),Range('b'))))
print("a2 accepts 'ab' : " + str(a2.accepts('ab')))
print("a2 accepts 'a' : " + str(a2.accepts('a')))
print("a2 accepts 'aa' : " + str(a2.accepts('aa')))
print("a2 accepts 'b' : " + str(a2.accepts('b')))
print("a2 accepts '' : " + str(a2.accepts('')))
Example #9
0
from mepgen.regex.regex import Range, Repeat, Choice, Concat, Automaton
from mepgen.regex.transformation import regex_to_automaton
from mepgen.automaton.transformation import remove_lambdas, determinize

regex = Repeat(Choice(Range(frozenset(["a"])), Range(frozenset(["b"]))))
aut = regex_to_automaton(regex)
print("aut is deterministic :", aut.is_deterministic())

tests = ["", "a", "b", "c", "aa", "ba", "ca", "aaa", "aba", "aab", "abc"]
for test in tests:
    print("aut accepts '" + test + "' :", str(aut.accepts(test)))

aut = remove_lambdas(aut)
print("aut is deterministic :", aut.is_deterministic())

tests = ["", "a", "b", "c", "aa", "ba", "ca", "aaa", "aba", "aab", "abc"]
for test in tests:
    print("aut accepts '" + test + "' :", str(aut.accepts(test)))

aut = determinize(aut)
print("aut is deterministic :", aut.is_deterministic())

tests = ["", "a", "b", "c", "aa", "ba", "ca", "aaa", "aba", "aab", "abc"]
for test in tests:
    print("aut accepts '" + test + "' :", str(aut.accepts(test)))
Example #10
0
f = open("lovecraft.txt", "r")
text = f.read()
f.close()

digi = '0123456789'
punc = '\'"()[]{}.,;:-@&'

alphafilter = lambda x : x.isalpha()
threshold = 0.05
minwordlen = 4

text = text.lower()
words = reject_short_words(
            remove_lambdas(
                text_to_automaton(text, alphafilter, threshold)
            ),
            minwordlen
        )
text = text.upper()
WORDS = reject_short_words(
            remove_lambdas(
                text_to_automaton(text, alphafilter, threshold)
            ),
            minwordlen
        )


allWords = Choice(Automaton(words), Automaton(WORDS))
sep = Choice(Range(punc), Choice(Range(digi), Concat(Range(digi), Range(digi))))
regex = Concat(allWords, Repeat(Concat(sep, allWords)))
Example #11
0
#punc = '@&"\'(§!)-$,;:=<#°_*%£?./+>{}€[]'
punc = '\'"()[]{}.,;:-@&'

text = text.lower()
a1 = text_to_automaton(text, lambda x : x.isalpha(), 0)

text = text.upper()
a2 = text_to_automaton(text, lambda x : x.isalpha(), 0)

regex = Choice(Automaton(a1), Automaton(a2))
wt3 = regex_to_wordtree(regex, 16)

text = text.lower() + text.upper()
a4 = determinize(
        remove_lambdas(
            text_to_automaton(text, lambda x : x.isalpha(), 0)
        )
    )
wt4 = automaton_to_wordtree(a4, 16)

words = ['hello', 'this', 'can', 'world', '']
for word in words:
    print("a1 accepts '" + word + "' : " + str(a1.accepts(word)))
    
reg = Repeat(Concat(Automaton(a1), Concat(Range(digi), Range(digi))))
aut = regex_to_automaton(reg)
aut = determinize(remove_lambdas(aut))
    
wt = remove_empty_subtrees(automaton_to_wordtree(aut, 16))
print("wt can produce " + str(wt.wordscount) + " words.")