Ejemplo n.º 1
0
def test_nonbreaking_space_issue_99():
    # https://github.com/goodmami/penman/issues/99
    assert [tok.type for tok in lexer.lex('1 2')] == ['SYMBOL', 'SYMBOL']
    assert [tok.type for tok in lexer.lex('1\t2')] == ['SYMBOL', 'SYMBOL']
    assert [tok.type for tok in lexer.lex('1\n2')] == ['SYMBOL', 'SYMBOL']
    assert [tok.type for tok in lexer.lex('1\r2')] == ['SYMBOL', 'SYMBOL']
    assert [tok.type for tok in lexer.lex('1\u00a02')] == ['SYMBOL']
    assert [tok.type for tok in lexer.lex('あ い')] == ['SYMBOL']
Ejemplo n.º 2
0
def parse(s: str) -> Tree:
    """
    Parse PENMAN-notation string *s* into its tree structure.

    Args:
        s: a string containing a single PENMAN-serialized graph
    Returns:
        The tree structure described by *s*.
    Example:
        >>> import penman
        >>> penman.parse('(b / bark-01 :ARG0 (d / dog))')  # noqa
        Tree(('b', [('/', 'bark-01'), (':ARG0', ('d', [('/', 'dog')]))]))

    """
    tokens = lex(s, pattern=PENMAN_RE)
    return _parse(tokens)
Ejemplo n.º 3
0
def parse_triples(s: str) -> List[BasicTriple]:
    """
    Parse a triple conjunction from *s*.

    Example:
        >>> import penman
        >>> for triple in penman.parse_triples('''
        ...         instance(b, bark) ^
        ...         ARG0(b, d) ^
        ...         instance(d, dog)'''):
        ...     print(triple)
        ('b', ':instance', 'bark')
        ('b', ':ARG0', 'd')
        ('d', ':instance', 'dog')

        """
    tokens = lex(s, pattern=TRIPLE_RE)
    return _parse_triples(tokens)
Ejemplo n.º 4
0
def iterparse(lines: Union[Iterable[str], str]) -> Iterator[Tree]:
    """
    Yield trees parsed from *lines*.

    Args:
        lines: a string or open file with PENMAN-serialized graphs
    Returns:
        The :class:`~penman.tree.Tree` object described in *lines*.
    Example:
        >>> import penman
        >>> for t in penman.iterparse('(a / alpha) (b / beta)'):
        ...     print(repr(t))
        ...
        Tree(('a', [('/', 'alpha')]))
        Tree(('b', [('/', 'beta')]))

    """
    tokens = lex(lines, pattern=PENMAN_RE)
    while tokens and tokens.peek().type in ('COMMENT', 'LPAREN'):
        yield _parse(tokens)
Ejemplo n.º 5
0
 def _lex(s):
     return [tok.type for tok in lexer.lex(s)]
Ejemplo n.º 6
0
 def _lex(s):
     return [tok.type for tok in lexer.lex(s, pattern=lexer.TRIPLE_RE)]
Ejemplo n.º 7
0
def test_lexing_issue_50():
    # https://github.com/goodmami/penman/issues/50
    assert [tok.type for tok in lexer.lex('(a :ROLE "a~b"~1)')
            ] == ['LPAREN', 'SYMBOL', 'ROLE', 'STRING', 'ALIGNMENT', 'RPAREN']