QuotedString(quoteChar='(', endQuoteChar=')').setResultsName("subgroup") +
    Suppress("]")
).setParseAction(lambda m: tokens.Context(
    [None, 'Subjgrp:' + subjgrp_label(m.subgroup, [])], bool(m.certain)))

# Phrases like '“Nonimmigrant visa”' become 'p12345678'
_double_quote_label = QuotedString(
    quoteChar=u'“', endQuoteChar=u'”'
).setParseAction(lambda m: "p{0}".format(hash_for_paragraph(m[0])))
# Phrases like "definition for the term “Nonimmigrant visa”" become a
# paragraph token with the appropriate paragraph label set
definition = (
    Marker("definition") +
    (Marker("of") | Marker("for")) +
    Optional(Marker("the") + Marker("term")) +
    _double_quote_label.copy().setResultsName("paragraph")
).setParseAction(lambda m: tokens.Paragraph.make(paragraphs=[m.paragraph]))

#   grammar which captures all of these possibilities
token_patterns = QuickSearchable(
    put_active | put_passive | post_active | post_passive |
    delete_active | delete_passive | move_active | move_passive |
    designate_active | reserve_active |
    insert_in_order |

    interp | marker_subpart | appendix |
    comment_context_with_section | comment_context_without_section |
    comment_context_under_with_section |
    paragraph_heading_of | section_heading_of |
    multiple_intro_text_of | intro_text_of |
    appendix_section_heading_of |
    QuotedString(quoteChar='(', endQuoteChar=')').setResultsName("subgroup") +
    Suppress("]")
).setParseAction(lambda m: tokens.Context(
    [None, 'Subjgrp:' + subjgrp_label(m.subgroup, [])], bool(m.certain)))

# Phrases like '“Nonimmigrant visa”' become 'p12345678'
_double_quote_label = QuotedString(
    quoteChar=u'“', endQuoteChar=u'”'
).setParseAction(lambda m: "p{}".format(hash_for_paragraph(m[0])))
# Phrases like "definition for the term “Nonimmigrant visa”" become a
# paragraph token with the appropriate paragraph label set
definition = (
    Marker("definition") +
    (Marker("of") | Marker("for")) +
    Optional(Marker("the") + Marker("term")) +
    _double_quote_label.copy().setResultsName("paragraph")
).setParseAction(lambda m: tokens.Paragraph(paragraphs=[m.paragraph]))

#   grammar which captures all of these possibilities
token_patterns = QuickSearchable(
    put_active | put_passive | post_active | post_passive |
    delete_active | delete_passive | move_active | move_passive |
    designate_active | reserve_active |
    insert_in_order |

    interp | marker_subpart | appendix |
    comment_context_with_section | comment_context_without_section |
    comment_context_under_with_section |
    paragraph_heading_of | section_heading_of |
    multiple_intro_text_of | intro_text_of |
    appendix_section_heading_of |
Esempio n. 3
0
    elif palavra in _PALAVRAS_PADRAO:
        palavra = _PALAVRAS_PADRAO[palavra]

    else:
        palavra = tira_acentos(palavra)

    return palavra


palavras_pt_BR = Word(alphanums + alphas8bit + '_=!')
palavras_pt_BR.setParseAction(_converte_ingles)

tripleQuote = QuotedString('"""', multiline=True, unquoteResults=False) | \
            QuotedString("'''", multiline=True, unquoteResults=False)

unicodeTripleQuote = Combine(Literal('u') + tripleQuote.copy()).setName(
    "unicode string triple quoted")

python_brasil = unicodeTripleQuote | tripleQuote | unicodeString | \
    quotedString | pythonStyleComment | \
    palavras_pt_BR


def python_pt_BR(texto, dicionario={}):
    global PALAVRAS_BRASIL
    PALAVRAS_BRASIL.update(dicionario)

    if type(texto) != unicode:
        texto = unicode(texto)

    return python_brasil.transformString(texto)