def test_pair(self): text = '-f -x' expect = NonTerminal(document(), [ NonTerminal(body(), [ NonTerminal(element(), [ NonTerminal(option_line(), [ NonTerminal(option_list(), [ NonTerminal(ol_first_option(), [ NonTerminal(option(), [ Terminal(short_no_arg(), 0, '-f'), ]), ]), NonTerminal(ol_term_with_separator(), [ NonTerminal(ol_separator(), [ NonTerminal(ol_space(), [ Terminal(StrMatch(' '), 0, ' '), ]), ]), NonTerminal(ol_term(), [ NonTerminal(option( ), [Terminal(short_no_arg(), 0, '-x')]), ]), ]), ]), Terminal(EOF(), 0, ''), ]), ]), ]), Terminal(EOF(), 0, ''), ]) self.parse_and_verify(text, expect)
def test_004__n1_eq_t1__with_eq(self): if False: from prettyprinter import cpprint as pp import p print(f": test_004__n1_eq_t1__with_eq") print(f": n1 :") pp(self.n1) print(f": t1 :") pp(self.t1) NonTerminal_enable_structural_eq() with self.assertRaises(AssertionError) as context: assert self.n1 == self.t1 # now it fails self.assertTrue('Internal error, AssertionError not raised !!!') assert self.n1 != self.t1 assert self.n1 == deepcopy(self.n1) t2 = Terminal(self.dot, 0, 'one') n2 = NonTerminal(self.dot, [t2]) assert self.n1 == n2 t2 = Terminal(self.dot, 0, 'one') n2 = NonTerminal(self.dot, [t2]) assert self.n1 == n2 bang = StrMatch('!', rule_name='bang') t3 = Terminal(bang, 0, 'one') n3 = NonTerminal(bang, [t3]) assert self.n1 != n3
def builder(self, texts): p_newline = Terminal(newline(), 0, '\n') text = "" body_ = [] for atom in texts: if atom == '\n': text += atom # print(f": atom = <newline>") body_.append(NonTerminal(element(), [p_newline])) else: # Fiddling necessary since 'line' does not support fragments # and paragraph does not support leading newlines atom = atom.strip() text += atom + '\n' # print(f": atom = '{atom}'") paragraph_ = [] for text_ in atom.split('\n'): p_line = NonTerminal( line(), [Terminal(text(), 0, text_), p_newline]) paragraph_.append(p_line) p_paragraph = NonTerminal(paragraph(), paragraph_) body_.append(NonTerminal(element(), [p_paragraph])) p_body = NonTerminal(body(), body_) p_eof = Terminal(EOF(), 0, '') expect = NonTerminal(document(), [p_body, p_eof]) return (text, expect)
def setUp(self): self.dot = StrMatch('.', rule_name='self.dot') self.s1 = 's1 : string' self.s2 = 's2 : string' self.s3 = 's3 : string' # rule, position, value self.t1 = Terminal(self.dot, 0, 'one') self.t2 = Terminal(self.dot, 0, 'two') self.t3 = Terminal(self.dot, 0, 'three') assert not isinstance(self.t1, list) assert not isinstance(self.t2, list) assert not isinstance(self.t3, list) # rule, value : a list where the first element is a node # self.n1 = NonTerminal(self.dot, self.t1) # TypeError: 'Terminal' object is not subscriptable self.n2 = NonTerminal(self.dot, [self.t1]) self.n3 = NonTerminal(self.dot, self.n2) self.n4 = NonTerminal(self.dot, [self.n2]) assert isinstance(self.n2, list) assert isinstance(self.n3, list) assert isinstance(self.n4, list) self.v0 = self.n2 self.v1 = [self.s1, self.s2] self.v2 = self.t1 self.v3s = self.s3 self.v3t = (self.s1, self.s2)
def test_with_some_words_2(self): parser = ParserPython(document, skipws=False) text = f"{self.words1}\n\n" parsed = parser.parse(text) # print('\n: parsed') ; pp(parsed) expect = NonTerminal(document(), [ NonTerminal(body(), [ NonTerminal(element(), [ Terminal(words(), 0, self.words1), ]), NonTerminal(element(), [ Terminal(newline(), 0, '\n'), ]), NonTerminal(element(), [ Terminal(blank_line(), 0, '\n'), ]), ]), Terminal(EOF(), 0, ''), ]) # print('\n: expect') ; pp(expect) assert parsed == expect, ( f"text = '{text}' :\n" f"[expect]\n{pp_str(expect)}\n[parsed]\n{pp_str(parsed)}")
def test_02_text_line_pair (self) : def body(): return OneOrMore ( [ text_line ], rule_name='body' ) def document(): return Sequence( ( body, EOF ), rule_name='document' ) # print('\n: document') ; pp(document()) parser = ParserPython( document, skipws=False ) text = self.line1 + self.line2 parsed = parser.parse(text) # print('\n: parsed') ; pp(parsed) # # print('\n: flatten') ; pp(flatten(parsed)) p_newline = Terminal(newline(), 0, '\n') p_l1_words = Terminal(words(), 0, self.line1[:-1]) p_l1_text_line = NonTerminal(text_line(), [ p_l1_words, p_newline ]) p_l2_words = Terminal(words(), 0, self.line2[:-1]) p_l2_text_line = NonTerminal(text_line(), [ p_l2_words, p_newline ]) p_body = NonTerminal(body(), [ p_l1_text_line, p_l2_text_line ]) p_eof = Terminal(EOF(), 0, '') expect = NonTerminal(document(), [p_body, p_eof] ) # print('\n: expect') ; pp(expect) assert parsed == expect, ( f"text = '{text}' :\n" f"[expect]\n{pp_str(expect)}\n[parsed]\n{pp_str(parsed)}" )
def test_3_explicit_blank_line(self): newline = common.newline blank_line = common.blank_line words = get_words('\n') t_newline = Terminal(newline(), 0, '\n') text = "" expect_list = [] s_blank_line_empty = '\n' t_blank_line_empty = Terminal(blank_line(), 0, s_blank_line_empty) #-------------------------------------------------------------------------- text += '\n' + s_blank_line_empty expect_list.extend((t_newline, t_blank_line_empty)) self.check_parse('test_blank_line : 1', blank_line, blank_line(), words, text, expect_list) #-------------------------------------------------------------------------- phrase = fake_spaces_etc(' ', 'Testing 1, 2, 3') t_phrase = Terminal(words(), 0, phrase) # t_blank_line_empty = Terminal(blank_line(), 0, s_blank_line_empty) text += phrase + '\n' + s_blank_line_empty expect_list.extend((t_phrase, t_newline, t_blank_line_empty)) self.check_parse('test_blank_line : 2', blank_line, blank_line(), words, text, expect_list)
def test_05_paragraph_multiple (self) : def body(): return OneOrMore ( OrderedChoice( [ paragraph, newline ] ), rule_name='body' ) def document(): return Sequence( ( body, EOF ), rule_name='document' ) # print('\n: document') ; pp(document()) parser = ParserPython( document, skipws=False ) text = self.line1 + self.line2 + '\n' text = text * 3 parsed = parser.parse(text) # print('\n: parsed') ; pp(parsed) p_newline = Terminal(newline(), 0, '\n') p_l1_words = Terminal(words(), 0, self.line1[:-1]) p_l1_text_line = NonTerminal(text_line(), [ p_l1_words, p_newline ]) p_l2_words = Terminal(words(), 0, self.line2[:-1]) p_l2_text_line = NonTerminal(text_line(), [ p_l2_words, p_newline ]) p_paragraph = NonTerminal(paragraph(), [ p_l1_text_line, p_l2_text_line ]) p_body = NonTerminal(body(), [ p_paragraph, p_newline, p_paragraph, p_newline, p_paragraph, p_newline, ]) p_eof = Terminal(EOF(), 0, '') expect = NonTerminal(document(), [ p_body, p_eof ] ) # print('\n: expect') ; pp(expect) assert parsed == expect, ( f"text = '{text}' :\n" f"[expect]\n{pp_str(expect)}\n[parsed]\n{pp_str(parsed)}" )
def option_line_expect(*terminals, eof=False, sep=None, indent=None, gap=None, help_=None, extra=0): if len(terminals) <= 0: raise ValueError( "No terminals provided. Please provide at least one.") separator = expect_separator(sep) sep_space = expect_separator(' ') # required for operands # print("[sep-space]") # pp(sep_space) members = [ NonTerminal( option_list(), [ NonTerminal(ol_first_option(), [ terminals[0], ]), *[ NonTerminal( ol_term_with_separator(), [ # separator, (sep_space if term.rule_name == 'operand' else separator), NonTerminal(ol_term(), [term]), ]) for term in terminals[1:] ], ]), Terminal(newline(), 0, '\n'), ] if indent and len(indent) > 0: members.insert(0, Terminal(StrMatch(' ', rule_name='wx'), 0, indent)) if help_ and len(help_) > 0: if extra < 0: extra = 0 # print(f": extra = {extra}") gap += ' ' * extra members.insert( -1, Terminal(StrMatch(gap, rule_name='option_line_gap'), 0, gap)) members.insert( -1, Terminal(StrMatch('.', rule_name='option_line_help'), 0, help_)) expect = NonTerminal(option_line(), [*members]) if eof: expect.append(Terminal(EOF(), 0, '')) return expect
def test_long_eq_arg(self): value = '--file=<a-file>' option_ = Terminal(long_no_arg(), 0, '--file') operand_ = Terminal(operand_angled(), 0, '<a-file>') operand_ = NonTerminal(operand(), [operand_]) expect = NonTerminal(long_eq_arg(), [option_, t_equals, operand_]) expect = NonTerminal(option(), [expect]) expect = NonTerminal(body(), [expect, t_eof]) super().single(body, value, expect)
def test_short_adj_arg(self): value = '-fFILE' option_ = Terminal(long_no_arg(), 0, '-f') operand_ = Terminal(operand_all_caps(), 0, 'FILE') operand_ = NonTerminal(operand(), [operand_]) expect = NonTerminal(short_adj_arg(), [option_, operand_]) expect = NonTerminal(option(), [expect]) expect = NonTerminal(body(), [expect, t_eof]) super().single(body, value, expect)
def test_long_gap_arg(self): value = '--file <a-file>' option_ = Terminal(long_no_arg(), 0, '--file') option_ = NonTerminal(option(), [option_]) p_ws1 = p_ws(' ') operand_ = Terminal(operand_angled(), 0, '<a-file>') operand_ = NonTerminal(operand(), [operand_]) expect = NonTerminal(body(), [option_, p_ws1.expect, operand_, t_eof]) super().single(body, value, expect)
def test_short_gap_arg(self): value = '-f FILE' option_ = Terminal(short_no_arg(), 0, '-f') option_ = NonTerminal(option(), [option_]) p_ws1 = p_ws(' ') operand_ = Terminal(operand_all_caps(), 0, 'FILE') operand_ = NonTerminal(operand(), [operand_]) expect = NonTerminal(body(), [option_, p_ws1.expect, operand_, t_eof]) super().single(body, value, expect)
def in_the_middle(): name = 'in_the_middle_between_two_phrases' left_phrase = fake_spaces_etc(s, 'for all good men') right_phrase = fake_spaces_etc(s, 'to rise up') assert s not in left_phrase assert s not in right_phrase _words = get_words(s) text = ''.join([left_phrase, s, right_phrase]) t_left = Terminal(_words(), 0, left_phrase) t_right = Terminal(_words(), 0, right_phrase) expect = (t_left, t_s, t_right) return name, grammar(_words), text, expect
def test_single_short_w_arg(self): text = '-fNORM' expect = create_expect( NonTerminal(option(), [ NonTerminal(short_adj_arg(), [ Terminal(short_adj_arg__option(), 0, '-f'), NonTerminal(operand(), [ Terminal(operand_all_caps(), 0, 'NORM'), ]), ]), ]), eof=(text[-1] != '\n'), ) self.parse_and_verify(text, expect)
def test_single_short_with_one_arg (self): text = '-fNORM' parsed = self.parser.parse(text) # tprint("[parsed]") ; pp(parsed) expect = create_expect ( NonTerminal( option(), [ NonTerminal( short_adj_arg(), [ Terminal( short_adj_arg__option(), 0, '-f' ) , NonTerminal( operand(), [ Terminal( operand_all_caps(), 0, 'NORM' ) , ]) , ]) , ]) , )
def paragraph(): name = 'several_occurances_in_a_paragraph' text = """<s> <s>The essence of America — that which really unites us — <s>is not ethnicity, <s>or<s>nationality, or religion. It is an <s> idea—and what an <s> idea it is : that you can come <s><s> from humble circumstances and do great things.<s> - Condoleezza Rice <s>""" # zero length phrases at start, end and one more in the middle n_empty = 3 text = text.replace('<s>', chr(7)) text = fake_spaces_etc(s, text) text = text.replace(chr(7), '<s>') assert s not in text _words = get_words(s) phrases = re.split('<s>', text) assert len(phrases[0]) == 0 assert len(phrases[-1]) == 0 t_s = Terminal(rule, 0, s) tw = lambda p: Terminal(_words(), 0, p) terms = [((tw(p) if len(p) > 0 else ()), t_s) for p in phrases] terms = flatten(terms) del terms[-1] assert len(terms) == 2 * len(phrases) - n_empty - 1 # Handle the simplest Zero/One Or Many rules on a character class # if isinstance(rule, RegExMatch) and rule.to_match[-1] in '*+': # collapse any series of 't_s' elements into a single ts element limit = len(terms) - 1 idx = 0 while idx < limit: if (terms[idx].rule_name == t_s.rule_name and terms[idx + 1].rule_name == t_s.rule_name): value = terms[idx].value + terms[idx + 1].value terms[idx] = Terminal(rule, 0, value) del terms[idx + 1] limit -= 1 else: idx += 1 return name, grammar(_words), s.join(phrases), tuple(terms)
def create_expect ( *terminals, sep = None ) : separator = expect_separator(sep) sep_space = expect_separator(' ') # FIXME: create global for 'SPACE' if len(terminals) <= 0 : raise ValueError("No terminals provided. Please provide at least one.") expect = NonTerminal( document(), [ NonTerminal( body(), [ NonTerminal( element(), [ NonTerminal( option_list(), [ NonTerminal( ol_first_option(), [ terminals[0], ]) , * [ NonTerminal( ol_term_with_separator(), [ # separator, (sep_space if term.rule_name == 'operand' else separator) , NonTerminal( ol_term(), [ term ]) , ]) for term in terminals[1:] ], ]) , ]) , ]) , Terminal(EOF(), 0, '') , ]) return expect
def usage_prepare_intro(text: str, *newlines): """ <newlines> """ # BRITTLE: depends on internal implementation of usage_intro and newline intro = usage_intro() intro_expr = intro.elements[0].to_match if not re.match(intro_expr, text): raise ValueError( f"Usage intro text '{text}' does not match regex '{expr}' such " f"that it will not be accepted in the parse. Please address.") trailing = [[], []] # peal off any trailing whitespace expr = f".*[^{WHITESPACE_RAW_STR}]({WS_REGEX})$" match = re.match(expr, text) if match: trailing_ = match.group(1) text = text[:len(text) - len(trailing_)] trailing[0].append(trailing_) trailing[1].append(Terminal(wx(), 0, trailing_)) # peal off all trailing newline (i.e. ws + linefeed) segments expr = f".*[^{WHITESPACE_RAW_STR}]({NEWLINE_REGEX})$" nl = re.compile(expr) while (match := nl.match(text)): trailing_ = match.group(1) text = text[:len(text) - len(trailing_)] trailing[0].insert(0, trailing_) trailing[1].insert(0, t_wx_newline(trailing_))
def execute_passes(self, name, input, start=None): if start is None: start = name self.grammar_file = GRAMMAR_PATTERN.replace('{name}', name) self.parser = DocOptParserPEG ( grammar_file = self.grammar_file, \ start = start, memoization = True, \ debug = False ) if self.show or self.debug is not False: print(f"\n: input = '{input}'") tree = self.perform_pass(0, 'raw', self.parser.parse, self.ws + input + self.ws) tree = self.perform_pass(1, 'simplify-1', Simplify_Pass1().visit, tree) tree = self.perform_pass(2, 'simplify-2', Simplify_Pass2().visit, tree) if False: # remove leading and trailing whitespace including newlines output = input.strip() # reduce each expanse of whitespace to a single space output = re.compile('[ \t]+').sub(' ', output) # remove spaces before of after a newline output = re.compile(' ?\n ?').sub('\n', output) expect = Terminal(self.parser.parser.parser_model, 0, value=output) assert tree == expect # Now this does seem to be using the Terminal's text eq return tree
def visit(self, node, depth=0): i = ' ' * 3 * depth # print(f"{i} [ p2 : node = {node}") if not hasattr(node, 'rule_name'): # print(f"{i} - not a known container : {str(type(node))}") # print(f"{i} => itself") # print(f"{i} ]") if isinstance(node, list) and len(node) == 1: return node[0] return node #---------------------------------------------------------------------- children = [] if isinstance(node, (NonTerminal, SemanticActionResults)): # print(f"{i} - visiting children of '{node.name}' : len = {len(node)}") # each of these object types is a list for child in node: response = self.visit(child, 1 + depth) if response: children.append(response) # print(f"{i} - visited children = {children}") #---------------------------------------------------------------------- # rule name specific visitor ? rule_name = str(node.rule_name) method = f"visit_{rule_name}" # print(f"{i} - {method} ?") if hasattr(self, method): # print(f"{i} - method found, applying to {node.name}") out = getattr(self, method)(node, children, 1 + depth) # print(f"{i} => {_res(out,i)}") # print(f"{i} ]") return out # else : # print(f"{i} - no such method") #---------------------------------------------------------------------- if len(children) <= 0: out = Terminal(node.rule, 0, node.value) # print(f"{i} => {_res(out,i)}") # print(f"{i} ]") return out if len(children) == 1: children = children[0] try: out = NonTerminal(node.rule, children) except: out = NonTerminal(node.rule, [Unwrap(children)]) # automatically unwrap out[0] = out[0].value # print(f"{i} => {_res(out,i)}") # print(f"{i} ]") return out
def execute_passes(self, name, input, start=None): if start is None: start = name self.grammar_file = GRAMMAR_PATTERN.replace('{name}', name) self.parser = DocOptParserPEG(grammar_file=self.grammar_file, start=start) with open("/dev/tty", "w") as tty, redirect_stdout(tty): tree = self.perform_pass(0, 'raw', self.parser.parse, input) tree = self.perform_pass(1, 'simplify-1', Simplify_Pass1().visit, tree) # remove leading and trailing whitespace including newlines output = input.strip() # reduce each expanse of whitespace to a single space output = re.compile('[ \t]+').sub(' ', output) # remove spaces before of after a newline output = re.compile(' ?\n ?').sub('\n', output) expect = Terminal(self.parser.parser.parser_model, 0, value=output) assert tree == expect return tree
def test_single_short_no_arg(self): text = '-f' expect = create_expect( NonTerminal(option(), [Terminal(short_no_arg(), 0, '-f')]), eof=(text[-1] != '\n'), ) self.parse_and_verify(text, expect)
def at_start(): name = 'at_start_followed_by_phrase' phrase = fake_spaces_etc(s, 'now is the time') assert s not in phrase _words = get_words(s) text = ''.join([s, phrase]) expect = (t_s, Terminal(_words(), 0, phrase)) return name, grammar(_words), text, expect
def as_lines(self, node, children, depth=0): if len(children): # print(f": as_lines() : {node.rule_name} : collecting {len(children)} children" # f": {repr(children)}") value = '\n'.join([c.value for c in children]) else: # print(f": as_lines() : {node.rule_name} : single value '{node.value}'") value = node.value return Terminal(StrMatch('', node.rule_name), 0, value)
def test_newline_elements_only(self): parser = ParserPython(document, skipws=False) text = '\n\n\n' parsed = parser.parse(text) # print('\n: parsed') ; pp(parsed) p_newline = Terminal(newline(), 0, '\n') p_element = NonTerminal(element(), [p_newline]) p_body = NonTerminal(body(), [p_element, p_element, p_element]) p_eof = Terminal(EOF(), 0, '') expect = NonTerminal(document(), [p_body, p_eof]) # print('\n: expect') ; pp(expect) assert parsed == expect, ( f"text = '{text}' :\n" f"[expect]\n{pp_str(expect)}\n[parsed]\n{pp_str(parsed)}")
def at_start_two_lines(): name = 'at_start_followed_by_phrase_two_lines' phrase = fake_spaces_etc(s, 'now is the time' + '\n') assert s not in phrase _words = get_words(s) text = ''.join([*((s, phrase) * 2)]) t_phrase = Terminal(_words(), 0, phrase) expect = (*((t_s, t_phrase) * 2), ) return name, grammar(_words), text, expect
def _visit_option_help(self, node, children): print(f": visit_option_help : {node.name}") pp(children) while isinstance(children[-1], list): additional = children[-1] children = children[:-1] children.extend(additional) if isinstance(children[-1], Terminal): children[-1] = children[-1].value return Terminal(node.rule, 0, '\n'.join(children))
def test_01_text_line_single (self) : def document(): return Sequence( ( text_line, EOF ), rule_name='document' ) # print('\n: document') ; pp(document()) parser = ParserPython( document, skipws=False ) text = self.line1 parsed = parser.parse(text) # print('\n: parsed') ; pp(parsed) p_newline = Terminal(newline(), 0, '\n') p_l1_words = Terminal(words(), 0, self.line1[:-1]) p_l1_text_line = NonTerminal(text_line(), [ p_l1_words, p_newline ]) p_eof = Terminal(EOF(), 0, '') expect = NonTerminal(document(), [p_l1_text_line, p_eof] ) # print('\n: expect') ; pp(expect) assert parsed == expect, ( f"text = '{text}' :\n" f"[expect]\n{pp_str(expect)}\n[parsed]\n{pp_str(parsed)}" )
def term__operand(op): if re_operand_angled.fullmatch(op) : operand_type = operand_angled elif re_operand_all_caps.fullmatch(op) : operand_type = operand_all_caps else : raise ValueError( f"Invalid optdef operand '{op}'. Expected either an " f"angle operand, '<foo>', or all caps, 'FOO'. Please address.") return NonTerminal( operand(), [ Terminal( operand_type(), 0, op ) ] )