def test_pair(self): text = '-f -x' expect = NonTerminal(document(), [ NonTerminal(body(), [ NonTerminal(element(), [ NonTerminal(option_line(), [ NonTerminal(option_list(), [ NonTerminal(ol_first_option(), [ NonTerminal(option(), [ Terminal(short_no_arg(), 0, '-f'), ]), ]), NonTerminal(ol_term_with_separator(), [ NonTerminal(ol_separator(), [ NonTerminal(ol_space(), [ Terminal(StrMatch(' '), 0, ' '), ]), ]), NonTerminal(ol_term(), [ NonTerminal(option( ), [Terminal(short_no_arg(), 0, '-x')]), ]), ]), ]), Terminal(EOF(), 0, ''), ]), ]), ]), Terminal(EOF(), 0, ''), ]) self.parse_and_verify(text, expect)
def create_expect ( *terminals, sep = None ) : separator = expect_separator(sep) sep_space = expect_separator(' ') # FIXME: create global for 'SPACE' if len(terminals) <= 0 : raise ValueError("No terminals provided. Please provide at least one.") expect = NonTerminal( document(), [ NonTerminal( body(), [ NonTerminal( element(), [ NonTerminal( option_list(), [ NonTerminal( ol_first_option(), [ terminals[0], ]) , * [ NonTerminal( ol_term_with_separator(), [ # separator, (sep_space if term.rule_name == 'operand' else separator) , NonTerminal( ol_term(), [ term ]) , ]) for term in terminals[1:] ], ]) , ]) , ]) , Terminal(EOF(), 0, '') , ]) return expect
def builder(self, texts): p_newline = Terminal(newline(), 0, '\n') text = "" body_ = [] for atom in texts: if atom == '\n': text += atom # print(f": atom = <newline>") body_.append(NonTerminal(element(), [p_newline])) else: # Fiddling necessary since 'line' does not support fragments # and paragraph does not support leading newlines atom = atom.strip() text += atom + '\n' # print(f": atom = '{atom}'") paragraph_ = [] for text_ in atom.split('\n'): p_line = NonTerminal( line(), [Terminal(text(), 0, text_), p_newline]) paragraph_.append(p_line) p_paragraph = NonTerminal(paragraph(), paragraph_) body_.append(NonTerminal(element(), [p_paragraph])) p_body = NonTerminal(body(), body_) p_eof = Terminal(EOF(), 0, '') expect = NonTerminal(document(), [p_body, p_eof]) return (text, expect)
def test_02_text_line_pair (self) : def body(): return OneOrMore ( [ text_line ], rule_name='body' ) def document(): return Sequence( ( body, EOF ), rule_name='document' ) # print('\n: document') ; pp(document()) parser = ParserPython( document, skipws=False ) text = self.line1 + self.line2 parsed = parser.parse(text) # print('\n: parsed') ; pp(parsed) # # print('\n: flatten') ; pp(flatten(parsed)) p_newline = Terminal(newline(), 0, '\n') p_l1_words = Terminal(words(), 0, self.line1[:-1]) p_l1_text_line = NonTerminal(text_line(), [ p_l1_words, p_newline ]) p_l2_words = Terminal(words(), 0, self.line2[:-1]) p_l2_text_line = NonTerminal(text_line(), [ p_l2_words, p_newline ]) p_body = NonTerminal(body(), [ p_l1_text_line, p_l2_text_line ]) p_eof = Terminal(EOF(), 0, '') expect = NonTerminal(document(), [p_body, p_eof] ) # print('\n: expect') ; pp(expect) assert parsed == expect, ( f"text = '{text}' :\n" f"[expect]\n{pp_str(expect)}\n[parsed]\n{pp_str(parsed)}" )
def test_with_some_words_2(self): parser = ParserPython(document, skipws=False) text = f"{self.words1}\n\n" parsed = parser.parse(text) # print('\n: parsed') ; pp(parsed) expect = NonTerminal(document(), [ NonTerminal(body(), [ NonTerminal(element(), [ Terminal(words(), 0, self.words1), ]), NonTerminal(element(), [ Terminal(newline(), 0, '\n'), ]), NonTerminal(element(), [ Terminal(blank_line(), 0, '\n'), ]), ]), Terminal(EOF(), 0, ''), ]) # print('\n: expect') ; pp(expect) assert parsed == expect, ( f"text = '{text}' :\n" f"[expect]\n{pp_str(expect)}\n[parsed]\n{pp_str(parsed)}")
def test_05_paragraph_multiple (self) : def body(): return OneOrMore ( OrderedChoice( [ paragraph, newline ] ), rule_name='body' ) def document(): return Sequence( ( body, EOF ), rule_name='document' ) # print('\n: document') ; pp(document()) parser = ParserPython( document, skipws=False ) text = self.line1 + self.line2 + '\n' text = text * 3 parsed = parser.parse(text) # print('\n: parsed') ; pp(parsed) p_newline = Terminal(newline(), 0, '\n') p_l1_words = Terminal(words(), 0, self.line1[:-1]) p_l1_text_line = NonTerminal(text_line(), [ p_l1_words, p_newline ]) p_l2_words = Terminal(words(), 0, self.line2[:-1]) p_l2_text_line = NonTerminal(text_line(), [ p_l2_words, p_newline ]) p_paragraph = NonTerminal(paragraph(), [ p_l1_text_line, p_l2_text_line ]) p_body = NonTerminal(body(), [ p_paragraph, p_newline, p_paragraph, p_newline, p_paragraph, p_newline, ]) p_eof = Terminal(EOF(), 0, '') expect = NonTerminal(document(), [ p_body, p_eof ] ) # print('\n: expect') ; pp(expect) assert parsed == expect, ( f"text = '{text}' :\n" f"[expect]\n{pp_str(expect)}\n[parsed]\n{pp_str(parsed)}" )
def option_line_expect(*terminals, eof=False, sep=None, indent=None, gap=None, help_=None, extra=0): if len(terminals) <= 0: raise ValueError( "No terminals provided. Please provide at least one.") separator = expect_separator(sep) sep_space = expect_separator(' ') # required for operands # print("[sep-space]") # pp(sep_space) members = [ NonTerminal( option_list(), [ NonTerminal(ol_first_option(), [ terminals[0], ]), *[ NonTerminal( ol_term_with_separator(), [ # separator, (sep_space if term.rule_name == 'operand' else separator), NonTerminal(ol_term(), [term]), ]) for term in terminals[1:] ], ]), Terminal(newline(), 0, '\n'), ] if indent and len(indent) > 0: members.insert(0, Terminal(StrMatch(' ', rule_name='wx'), 0, indent)) if help_ and len(help_) > 0: if extra < 0: extra = 0 # print(f": extra = {extra}") gap += ' ' * extra members.insert( -1, Terminal(StrMatch(gap, rule_name='option_line_gap'), 0, gap)) members.insert( -1, Terminal(StrMatch('.', rule_name='option_line_help'), 0, help_)) expect = NonTerminal(option_line(), [*members]) if eof: expect.append(Terminal(EOF(), 0, '')) return expect
def expect_document(sections): if len(sections) <= 0: raise ValueError("No sections provided. Please provide at least one.") expect = NonTerminal(document(), [ NonTerminal(body(), [NonTerminal(element(), sections)]), Terminal(EOF(), 0, ''), ]) return expect
def test_single(self): text = '-f' expect = NonTerminal(document(), [ NonTerminal(body(), [ NonTerminal(element(), [ NonTerminal(option_line(), [ NonTerminal(option_list(), [ NonTerminal(ol_first_option(), [ NonTerminal(option(), [ Terminal(short_no_arg(), 0, '-f'), ]), ]), ]), Terminal(EOF(), 0, ''), ]), ]), ]), Terminal(EOF(), 0, ''), ]) self.parse_and_verify(text, expect)
def create_expect(*terminals, eof=False, sep=None): if len(terminals) <= 0: raise ValueError( "No terminals provided. Please provide at least one.") separator = expect_separator(sep) sep_space = expect_separator(' ') # required for operands # print("[sep-space]") # pp(sep_space) expect = NonTerminal( document(), [ NonTerminal( body(), [ NonTerminal( element(), [ NonTerminal( option_line(), [ NonTerminal(option_list(), [ NonTerminal(ol_first_option(), [terminals[0]]), *[ NonTerminal( ol_term_with_separator(), [ (sep_space if term.rule_name == 'operand' else separator), NonTerminal( ol_term(), [term]), ]) for term in terminals[1:] ], ]), # Terminal(EOF(), 0, '') , # only if specified via 'eof' ]), ]), ]), Terminal(EOF(), 0, ''), ]) if eof: expect[0][0][0].append(expect[-1]) return expect
def __init__(self, *args, **kwargs): super(TextXModelParser, self).__init__(*args, **kwargs) # By default first rule is starting rule # and must be followed by the EOF self.parser_model = Sequence(nodes=[top_rule, EOF()], rule_name='Model', root=True) self.comments_model = comments_model # Stack for metaclass instances self._inst_stack = [] # Dict for cross-ref resolving # { id(class): { obj.name: obj}} self._instances = {}
def test_with_some_paragraphs(self): parser = ParserPython(document, skipws=False) paragraph = f"{self.words1}\n{self.words2}\n{self.words3}\n" text = paragraph + '\n' + paragraph parsed = parser.parse(text) # print('\n: parsed') ; pp(parsed) x_paragraph = [ NonTerminal(element(), [ Terminal(words(), 0, self.words1), ]), NonTerminal(element(), [ Terminal(newline(), 0, '\n'), ]), NonTerminal(element(), [ Terminal(words(), 0, self.words2), ]), NonTerminal(element(), [ Terminal(newline(), 0, '\n'), ]), NonTerminal(element(), [ Terminal(words(), 0, self.words3), ]), NonTerminal(element(), [ Terminal(newline(), 0, '\n'), ]), ] expect = NonTerminal(document(), [ NonTerminal(body(), [ *x_paragraph, NonTerminal(element(), [ Terminal(blank_line(), 0, '\n'), ]), *x_paragraph, ]), Terminal(EOF(), 0, ''), ]) # print('\n: expect') ; pp(expect) assert parsed == expect, ( f"text = '{text}' :\n" f"[expect]\n{pp_str(expect)}\n[parsed]\n{pp_str(parsed)}")
def test_newline_elements_only(self): parser = ParserPython(document, skipws=False) text = '\n\n\n' parsed = parser.parse(text) # print('\n: parsed') ; pp(parsed) p_newline = Terminal(newline(), 0, '\n') p_element = NonTerminal(element(), [p_newline]) p_body = NonTerminal(body(), [p_element, p_element, p_element]) p_eof = Terminal(EOF(), 0, '') expect = NonTerminal(document(), [p_body, p_eof]) # print('\n: expect') ; pp(expect) assert parsed == expect, ( f"text = '{text}' :\n" f"[expect]\n{pp_str(expect)}\n[parsed]\n{pp_str(parsed)}")
def test_01_text_line_single (self) : def document(): return Sequence( ( text_line, EOF ), rule_name='document' ) # print('\n: document') ; pp(document()) parser = ParserPython( document, skipws=False ) text = self.line1 parsed = parser.parse(text) # print('\n: parsed') ; pp(parsed) p_newline = Terminal(newline(), 0, '\n') p_l1_words = Terminal(words(), 0, self.line1[:-1]) p_l1_text_line = NonTerminal(text_line(), [ p_l1_words, p_newline ]) p_eof = Terminal(EOF(), 0, '') expect = NonTerminal(document(), [p_l1_text_line, p_eof] ) # print('\n: expect') ; pp(expect) assert parsed == expect, ( f"text = '{text}' :\n" f"[expect]\n{pp_str(expect)}\n[parsed]\n{pp_str(parsed)}" )
def __init__(self, *args, **kwargs): super(TextXModelParser, self).__init__(*args, **kwargs) # By default first rule is starting rule # and must be followed by the EOF self.parser_model = Sequence( nodes=[top_rule, EOF()], rule_name='Model', root=True) self.comments_model = comments_model # Stack for metaclass instances self._inst_stack = [] # Dict for cross-ref resolving # { id(class): { obj.name: obj}} self._instances = {} # List to keep track of all cross-ref that need to be resolved # Contained elements are tuples: (instance, metaattr, cross-ref) self._crossrefs = []
def test_with_some_words_1(self): parser = ParserPython(document, skipws=False) text = f"{self.words1}\n\n" parsed = parser.parse(text) # print('\n: parsed') ; pp(parsed) t_words1 = Terminal(words(), 0, self.words1) e_words1 = NonTerminal(element(), [t_words1]) t_newline = Terminal(newline(), 0, '\n') e_newline = NonTerminal(element(), [t_newline]) t_blank_line = Terminal(blank_line(), 0, '\n') e_blank_line = NonTerminal(element(), [t_blank_line]) p_body = NonTerminal(body(), [e_words1, e_newline, e_blank_line]) p_eof = Terminal(EOF(), 0, '') expect = NonTerminal(document(), [p_body, p_eof]) # print('\n: expect') ; pp(expect) assert parsed == expect, ( f"text = '{text}' :\n" f"[expect]\n{pp_str(expect)}\n[parsed]\n{pp_str(parsed)}")
def builder(self, texts): p_newline = Terminal(newline(), 0, '\n') text = ''.join(flatten(texts)) body_ = [] for atom in texts: if atom == '\n': # print(f": atom = <newline>") body_.append(NonTerminal(element(), [p_newline])) else: # print(f": atom = '{atom}'") body_.append( NonTerminal(element(), [Terminal(words(), 0, atom)])) p_body = NonTerminal(body(), body_) p_eof = Terminal(EOF(), 0, '') expect = NonTerminal(document(), [p_body, p_eof]) # print('\n: expect') ; pp(expect) return (text, expect)
def test_00_simply_three_newlines (self) : def element(): return OrderedChoice ( newline, rule_name='element' ) def body(): return OneOrMore ( element, rule_name='body' ) def document(): return Sequence( body, EOF, rule_name='document' ) # print('\n: document') ; pp(document()) parser = ParserPython( document, skipws=False ) text = '\n\n\n' parsed = parser.parse(text) # print('\n: parsed') ; pp(parsed) p_newline = Terminal(newline(), 0, '\n') p_element = NonTerminal(element(), [p_newline]) p_body = NonTerminal(body(), [ p_element, p_element, p_element ]) p_eof = Terminal(EOF(), 0, '') expect = NonTerminal(document(), [p_body, p_eof] ) # print('\n: expect') ; pp(expect) # assert parsed == expect, ( f"text = '{text}' :\n" f"[expect]\n{pp_str(expect)}\n[parsed]\n{pp_str(parsed)}" )
BLANK_LINE_REGEX = r'(?<=\n)' + NEWLINE_REGEX def blank_line(): """Two newlines with optional whitespace in between""" return _(BLANK_LINE_REGEX, rule_name='blank_line', skipws=False) #------------------------------------------------------------------------------ t_newline = Terminal(newline(), 0, '\n') p_newline = ParseSpec(LINEFEED, newline, t_newline) t_eof = Terminal(EOF(), 0, '') p_eof = ParseSpec('', EOF, t_eof) #------------------------------------------------------------------------------ def linefeed_eol_only(text): n_linefeeds = text.count(LINEFEED) # print(f"\n: n_linefeeds = {n_linefeeds}") if n_linefeeds <= 0: raise ValueError(f"No linefeeds in <text>, one is required " "at the end. Please address.") if n_linefeeds > 1:
def scenarios(rule, s): # -> grammar, text, expect assert FAKE_SPACE not in s, "INTERNAL ERROR, chosen 'fake space' in string to be tested !" rule = deepcopy(rule) catchall = RegExMatch(r'.*', rule_name='catch_all') newline = RegExMatch(r'[\n]', rule_name='newline') # t_newline = Terminal(newline, 0, '\n') t_eof = Terminal(EOF(), 0, '') t_s = Terminal(rule, 0, s) def grammar(_words): body = OneOrMore(OrderedChoice([rule, _words, catchall, newline])) return Sequence((body, EOF)) def itself(): name = f"by_itself" return name, Sequence((rule, EOF)), s, (t_s, ) def at_start(): name = 'at_start_followed_by_phrase' phrase = fake_spaces_etc(s, 'now is the time') assert s not in phrase _words = get_words(s) text = ''.join([s, phrase]) expect = (t_s, Terminal(_words(), 0, phrase)) return name, grammar(_words), text, expect def at_start_twice(): name = 'at_start_followed_by_phrase_twice' # 's' at start followed by a phrase, TWICE phrase = fake_spaces_etc(s, 'now is the time') assert s not in phrase _words = get_words(s) text = ''.join([*((s, phrase) * 2)]) t_phrase = Terminal(_words(), 0, phrase) expect = (*((t_s, t_phrase) * 2), ) return name, grammar(_words), text, expect def at_start_two_lines(): name = 'at_start_followed_by_phrase_two_lines' phrase = fake_spaces_etc(s, 'now is the time' + '\n') assert s not in phrase _words = get_words(s) text = ''.join([*((s, phrase) * 2)]) t_phrase = Terminal(_words(), 0, phrase) expect = (*((t_s, t_phrase) * 2), ) return name, grammar(_words), text, expect # !@# def in_the_middle(): name = 'in_the_middle_between_two_phrases' left_phrase = fake_spaces_etc(s, 'for all good men') right_phrase = fake_spaces_etc(s, 'to rise up') assert s not in left_phrase assert s not in right_phrase _words = get_words(s) text = ''.join([left_phrase, s, right_phrase]) t_left = Terminal(_words(), 0, left_phrase) t_right = Terminal(_words(), 0, right_phrase) expect = (t_left, t_s, t_right) return name, grammar(_words), text, expect def at_end(): name = 'at_end_preceeded_by_a_phrase' phrase = fake_spaces_etc(s, 'in defense of freedom') assert s not in phrase _words = get_words(s) t_phrase = Terminal(_words(), 0, phrase) text = ''.join([s, phrase]) expect = ( t_s, t_phrase, ) return name, grammar(_words), text, expect #-------------------------------------------------------------------------- def paragraph(): name = 'several_occurances_in_a_paragraph' text = """<s> <s>The essence of America — that which really unites us — <s>is not ethnicity, <s>or<s>nationality, or religion. It is an <s> idea—and what an <s> idea it is : that you can come <s><s> from humble circumstances and do great things.<s> - Condoleezza Rice <s>""" # zero length phrases at start, end and one more in the middle n_empty = 3 text = text.replace('<s>', chr(7)) text = fake_spaces_etc(s, text) text = text.replace(chr(7), '<s>') assert s not in text _words = get_words(s) phrases = re.split('<s>', text) assert len(phrases[0]) == 0 assert len(phrases[-1]) == 0 t_s = Terminal(rule, 0, s) tw = lambda p: Terminal(_words(), 0, p) terms = [((tw(p) if len(p) > 0 else ()), t_s) for p in phrases] terms = flatten(terms) del terms[-1] assert len(terms) == 2 * len(phrases) - n_empty - 1 # Handle the simplest Zero/One Or Many rules on a character class # if isinstance(rule, RegExMatch) and rule.to_match[-1] in '*+': # collapse any series of 't_s' elements into a single ts element limit = len(terms) - 1 idx = 0 while idx < limit: if (terms[idx].rule_name == t_s.rule_name and terms[idx + 1].rule_name == t_s.rule_name): value = terms[idx].value + terms[idx + 1].value terms[idx] = Terminal(rule, 0, value) del terms[idx + 1] limit -= 1 else: idx += 1 return name, grammar(_words), s.join(phrases), tuple(terms) #-------------------------------------------------------------------------- # !@# tests = [ itself, at_start, at_start_twice, at_start_two_lines, in_the_middle, at_end, paragraph, ] for test in tests: yield test()