def verify(self, text, expect, parsed, quiet=QUIET_NONE): if not hasattr(self, 'text') or self.text != text: self.text = text if not hasattr(self, 'expect') or not nodes_equal(expect, self.expect): self.expect = expect if not hasattr(self, 'parsed') or not nodes_equal(parsed, self.parsed): self.parsed = parsed if self.record: write_scratch( grammar=self.grammar, text=self.text, expect=self.expect, expect_f=flatten(self.expect), model=self.parser.parser_model, ) if self.analyzing: self.analyze() assert nodes_equal(parsed, expect), \ ( f"text = '{text}' :\n" f"[expect]\n{pp_str(expect)}\n" f"[parsed]\n{pp_str(parsed)}" )
def parse(self, text, expect, quiet=QUIET_NONE): if not hasattr(self, 'text') or self.text != text: self.text = text if not hasattr(self, 'expect') or not nodes_equal(expect, self.expect): self.expect = expect # tprint(f"\nOptions :\n{text}") # written here and in verify since they may be called independently if self.record: write_scratch( grammar=self.grammar, text=self.text, expect=self.expect, expect_f=flatten(self.expect), model=self.parser.parser_model, ) try: # print(f"\n: text = '{text}'") self.parsed = self.parser.parse(text) # tprint("[parsed]") ; pp(self.parsed) if self.record: write_scratch(parsed=self.parsed) except Exception as e: if not quiet.parse: print("\n" f"[expect]\n{pp_str(expect)}\n\n" f"text = '{text}' :\n\n" f"Parse FAILED :\n" f"{str(e)}") raise return self.parsed
def paragraph(): name = 'several_occurances_in_a_paragraph' text = """<s> <s>The essence of America — that which really unites us — <s>is not ethnicity, <s>or<s>nationality, or religion. It is an <s> idea—and what an <s> idea it is : that you can come <s><s> from humble circumstances and do great things.<s> - Condoleezza Rice <s>""" # zero length phrases at start, end and one more in the middle n_empty = 3 text = text.replace('<s>', chr(7)) text = fake_spaces_etc(s, text) text = text.replace(chr(7), '<s>') assert s not in text _words = get_words(s) phrases = re.split('<s>', text) assert len(phrases[0]) == 0 assert len(phrases[-1]) == 0 t_s = Terminal(rule, 0, s) tw = lambda p: Terminal(_words(), 0, p) terms = [((tw(p) if len(p) > 0 else ()), t_s) for p in phrases] terms = flatten(terms) del terms[-1] assert len(terms) == 2 * len(phrases) - n_empty - 1 # Handle the simplest Zero/One Or Many rules on a character class # if isinstance(rule, RegExMatch) and rule.to_match[-1] in '*+': # collapse any series of 't_s' elements into a single ts element limit = len(terms) - 1 idx = 0 while idx < limit: if (terms[idx].rule_name == t_s.rule_name and terms[idx + 1].rule_name == t_s.rule_name): value = terms[idx].value + terms[idx + 1].value terms[idx] = Terminal(rule, 0, value) del terms[idx + 1] limit -= 1 else: idx += 1 return name, grammar(_words), s.join(phrases), tuple(terms)
def usage_prepare_choice(children: List[Tuple[str, ParseTreeNode]], gap: int = 1): """<children> : list of ( text, expect ) <spaces> : number of spaces before and after BAR, '|' [DEFAULT: 1] Does not need a BAR boolean argument. With a single expression child, such is unneccessary. Multiple children alway separated by BAR, '|'. gap """ if gap < 0: gap = 0 sep = (' ' * gap) + BAR + (' ' * gap) text = sep.join([x[0] for x in children]) expect = NonTerminal(choice(), flatten([(x[1], t_bar) for x in children])) del expect[-1] return (text, expect)
def optlst_permutations ( *words, n_opt_max=3 ): words = flatten(words) options = [ ] for word in words : short = f"-{word[0]}" options.append( (short, ) ) for arg in ( word.upper(), f"<{word}>" ) : for gap in ( '', ' ' ) : options.append( (short, gap, arg) ) long = f"--{word}" options.append( (long, ) ) for arg in ( word.upper(), f"<{word}>" ) : for gap in '= ': options.append( (long, gap, arg) ) for length in range(1, min(n_opt_max,len(options)+1)) : for result in itertools.permutations(options, length) : yield result
def builder(self, texts): p_newline = Terminal(newline(), 0, '\n') text = ''.join(flatten(texts)) body_ = [] for atom in texts: if atom == '\n': # print(f": atom = <newline>") body_.append(NonTerminal(element(), [p_newline])) else: # print(f": atom = '{atom}'") body_.append( NonTerminal(element(), [Terminal(words(), 0, atom)])) p_body = NonTerminal(body(), body_) p_eof = Terminal(EOF(), 0, '') expect = NonTerminal(document(), [p_body, p_eof]) # print('\n: expect') ; pp(expect) return (text, expect)
def usage_prepare_argument_optdef(optdef: OptionDef): (text, terms) = create_terms_obj(olst(optdef)) terms = flatten([(t, t_space) for t in terms]) del terms[-1] expect = NonTerminal(argument(), [*terms]) return (text, expect)
def usage_prepare_option(optdef: OptionDef): (text, terms) = create_terms_obj(olst(optdef)) terms = flatten([(t, t_space) for t in terms]) del terms[-1] return (text, terms)