This repository has been archived by the owner on Oct 30, 2021. It is now read-only.
/
regex_to_grammar.py
122 lines (104 loc) · 4.12 KB
/
regex_to_grammar.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from grammar import Grammar
class RegexToGrammar(object):
def __init__(self, re):
self.re = re
self.stack = []
self.dictionary = {}
# operator count
self.X_no = map(str, range(1, self.re.count('*')+self.re.count('+')+1))
self.Y_no = map(str, range(1, self.re.count('|')+1))
def get_grammar(self):
print '----------------- BEGIN regex_to_grammar -----------------\n'
binary_op = ['|', '.']
terminals = set(list(self.re))
if '+' in terminals:
terminals.remove('+')
if '*' in terminals:
terminals.remove('*')
if '|' in terminals:
terminals.remove('|')
if '.' in terminals:
terminals.remove('.')
for i in self.re:
if i in terminals:
# print 'Stack=',self.stack ,'char', i
self.stack.append(i)
else:
if i not in binary_op:
# print 'Stack=',self.stack ,'char', i
self.stack.append(
self.create_new_rules(i, self.stack.pop()))
else:
# print 'Stack=',self.stack ,'char', i
dummy = self.create_new_rules(i, self.stack[-2:])
a = self.stack.pop()
b = self.stack.pop()
self.stack.append(dummy)
self.dictionary['S'] = [('S', ''.join(self.stack.pop()))]
# print "Dictionary/Grammar:", self.dictionary
# print 'Terminals:', list(terminals)
# print 'Non Terminals:', self.dictionary.keys()
# print 'Number of rules:', len(self.dictionary)
grammar_ = Grammar(
set(self.dictionary.keys()), terminals, self.dictionary, 'S')
print "Grammar: ",
grammar_.print_grammar()
print grammar_.grammar[2]
print '\n----------------- END regex_to_grammar -----------------\n\n'
return grammar_
def get_terminals(self):
terminals = []
for rule in self.dictionary.values():
for individual_rule in rule:
# print 'ir',individual_rule
terminal = individual_rule[-1]
for nt in self.dictionary.keys():
if nt in terminal:
terminal = terminal.replace(nt, '')
terminals.append(terminal)
terminals_set = set(terminals)
if '' in terminals_set:
terminals_set.remove('')
return terminals_set
def create_new_rules(self, operator, operands):
# print operands
if operator == '*':
non_terminal = 'X'+self.X_no.pop(0)
lists = [(non_terminal, operands+non_terminal),
(non_terminal, '')]
self.dictionary[non_terminal] = lists
elif operator == '+':
non_terminal = 'X'+self.X_no.pop(0)
lists = [(non_terminal, operands+non_terminal),
(non_terminal, operands)]
self.dictionary[non_terminal] = lists
elif operator == '.':
# print 'operands', operands
non_terminal = self.concat(operands)
else:
non_terminal = 'Y'+self.Y_no.pop(0)
lists = [(non_terminal, ''.join(operands[0])),
(non_terminal, ''.join(operands[1]))]
self.dictionary[non_terminal] = lists
# print self.dictionary
return non_terminal
def concat(self, operands):
""" Combines two given operands to a single list """
non_terminal = []
if type(operands[0]) is list:
for item in operands[0]:
non_terminal.append(item)
else:
non_terminal.append(operands[0])
if type(operands[1]) is list:
for item in operands[0]:
non_terminal.append(item)
else:
non_terminal.append(operands[1])
non_terminal = ''.join(non_terminal)
# print 'concatted output', non_terminal
return non_terminal
# dummy='abcd*ef|*..|*.'
# dummyreg=regex2grammar(dummy)
# dummyreg.get_grammar()
# # dummyreg.get_terminals()