def test_nonbreaking_space_issue_99(): # https://github.com/goodmami/penman/issues/99 assert [tok.type for tok in lexer.lex('1 2')] == ['SYMBOL', 'SYMBOL'] assert [tok.type for tok in lexer.lex('1\t2')] == ['SYMBOL', 'SYMBOL'] assert [tok.type for tok in lexer.lex('1\n2')] == ['SYMBOL', 'SYMBOL'] assert [tok.type for tok in lexer.lex('1\r2')] == ['SYMBOL', 'SYMBOL'] assert [tok.type for tok in lexer.lex('1\u00a02')] == ['SYMBOL'] assert [tok.type for tok in lexer.lex('あ い')] == ['SYMBOL']
def parse(s: str) -> Tree: """ Parse PENMAN-notation string *s* into its tree structure. Args: s: a string containing a single PENMAN-serialized graph Returns: The tree structure described by *s*. Example: >>> import penman >>> penman.parse('(b / bark-01 :ARG0 (d / dog))') # noqa Tree(('b', [('/', 'bark-01'), (':ARG0', ('d', [('/', 'dog')]))])) """ tokens = lex(s, pattern=PENMAN_RE) return _parse(tokens)
def parse_triples(s: str) -> List[BasicTriple]: """ Parse a triple conjunction from *s*. Example: >>> import penman >>> for triple in penman.parse_triples(''' ... instance(b, bark) ^ ... ARG0(b, d) ^ ... instance(d, dog)'''): ... print(triple) ('b', ':instance', 'bark') ('b', ':ARG0', 'd') ('d', ':instance', 'dog') """ tokens = lex(s, pattern=TRIPLE_RE) return _parse_triples(tokens)
def iterparse(lines: Union[Iterable[str], str]) -> Iterator[Tree]: """ Yield trees parsed from *lines*. Args: lines: a string or open file with PENMAN-serialized graphs Returns: The :class:`~penman.tree.Tree` object described in *lines*. Example: >>> import penman >>> for t in penman.iterparse('(a / alpha) (b / beta)'): ... print(repr(t)) ... Tree(('a', [('/', 'alpha')])) Tree(('b', [('/', 'beta')])) """ tokens = lex(lines, pattern=PENMAN_RE) while tokens and tokens.peek().type in ('COMMENT', 'LPAREN'): yield _parse(tokens)
def _lex(s): return [tok.type for tok in lexer.lex(s)]
def _lex(s): return [tok.type for tok in lexer.lex(s, pattern=lexer.TRIPLE_RE)]
def test_lexing_issue_50(): # https://github.com/goodmami/penman/issues/50 assert [tok.type for tok in lexer.lex('(a :ROLE "a~b"~1)') ] == ['LPAREN', 'SYMBOL', 'ROLE', 'STRING', 'ALIGNMENT', 'RPAREN']