Esempio n. 1
0
class BibTeXGrammar(Grammar):
    r"""Parser for a BibTeX source file.
    """
    text = Forward()
    source_hash__ = "f070f9a8eaff76cdd1669dcb63d8b8f3"
    disposable__ = re.compile('..(?<=^)')
    static_analysis_pending__ = []  # type: List[bool]
    parser_initialization__ = ["upon instantiation"]
    COMMENT__ = r'(?i)%[^\n]*\n'
    comment_rx__ = re.compile(COMMENT__)
    WHITESPACE__ = r'\s*'
    WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
    wsp__ = Whitespace(WSP_RE__)
    EOF = NegativeLookahead(RegExp('(?i).'))
    WS = Alternative(Series(Lookahead(RegExp('(?i)[ \\t]*%')), wsp__), RegExp('(?i)[ \\t]+'))
    ESC = Series(Lookbehind(RegExp('(?i)\\\\')), RegExp('(?i)[%&_]'))
    CONTENT_STRING = OneOrMore(Alternative(RegExp('(?i)[^{}%&_ \\t]+'), ESC, WS))
    COMMA_TERMINATED_STRING = ZeroOrMore(Alternative(RegExp('(?i)[^,%&_ \\t]+'), ESC, WS))
    NO_BLANK_STRING = Series(OneOrMore(Alternative(RegExp('(?i)[^ \\t\\n,%&_]+'), ESC)), wsp__)
    WORD = Series(RegExp('(?i)\\w+'), wsp__)
    text.set(ZeroOrMore(Alternative(CONTENT_STRING, Series(Series(Text("{"), wsp__), text, Series(Text("}"), wsp__)))))
    plain_content = Synonym(COMMA_TERMINATED_STRING)
    content = Alternative(Series(Series(Text("{"), wsp__), text, Series(Text("}"), wsp__)), plain_content)
    field = Synonym(WORD)
    key = Synonym(NO_BLANK_STRING)
    type = Synonym(WORD)
    entry = Series(RegExp('(?i)@'), type, Series(Text("{"), wsp__), key, ZeroOrMore(Series(Series(Text(","), wsp__), field, Series(Text("="), wsp__), content, mandatory=2)), Option(Series(Text(","), wsp__)), Series(Text("}"), wsp__), mandatory=6)
    comment = Series(Series(Text("@Comment{"), wsp__), text, Series(Text("}"), wsp__), mandatory=2)
    pre_code = ZeroOrMore(Alternative(RegExp('(?i)[^"%]+'), RegExp('(?i)%.*\\n')))
    preamble = Series(Series(Text("@Preamble{"), wsp__), RegExp('(?i)"'), pre_code, RegExp('(?i)"'), wsp__, Series(Text("}"), wsp__), mandatory=5)
    bibliography = Series(ZeroOrMore(Alternative(preamble, comment, entry)), wsp__, EOF)
    root__ = bibliography
Esempio n. 2
0
class FixedEBNFGrammar(Grammar):
    r"""Parser for a FixedEBNF source file.
    """
    countable = Forward()
    element = Forward()
    expression = Forward()
    source_hash__ = "8dbc09df6de2f2758e43fc351a3671c7"
    disposable__ = re.compile(
        'component$|pure_elem$|countable$|FOLLOW_UP$|SYM_REGEX$|ANY_SUFFIX$|EOF$'
    )
    static_analysis_pending__ = []  # type: List[bool]
    parser_initialization__ = ["upon instantiation"]
    error_messages__ = {
        'definition':
        [(re.compile(r','),
          'Delimiter "," not expected in definition!\\nEither this was meant to be a directive and the directive symbol @ is missing\\nor the error is due to inconsistent use of the comma as a delimiter\\nfor the elements of a sequence.'
          )]
    }
    resume_rules__ = {
        'definition': [re.compile(r'\n\s*(?=@|\w+\w*\s*=)')],
        'directive': [re.compile(r'\n\s*(?=@|\w+\w*\s*=)')]
    }
    COMMENT__ = r'(?!#x[A-Fa-f0-9])#.*(?:\n|$)|\/\*(?:.|\n)*?\*\/|\(\*(?:.|\n)*?\*\)'
    comment_rx__ = re.compile(COMMENT__)
    WHITESPACE__ = r'\s*'
    WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
    wsp__ = Whitespace(WSP_RE__)
    dwsp__ = Drop(Whitespace(WSP_RE__))
    HEXCODE = RegExp('[A-Fa-f0-9]{1,8}')
    SYM_REGEX = RegExp('(?!\\d)\\w+')
    RE_CORE = RegExp('(?:(?<!\\\\)\\\\(?:/)|[^/])*')
    regex_heuristics = Alternative(RegExp('[^ ]'),
                                   RegExp('[^/\\n*?+\\\\]*[*?+\\\\][^/\\n]/'))
    literal_heuristics = Alternative(
        RegExp('~?\\s*"(?:[\\\\]\\]|[^\\]]|[^\\\\]\\[[^"]*)*"'),
        RegExp("~?\\s*'(?:[\\\\]\\]|[^\\]]|[^\\\\]\\[[^']*)*'"),
        RegExp('~?\\s*`(?:[\\\\]\\]|[^\\]]|[^\\\\]\\[[^`]*)*`'),
        RegExp('~?\\s*´(?:[\\\\]\\]|[^\\]]|[^\\\\]\\[[^´]*)*´'),
        RegExp('~?\\s*/(?:[\\\\]\\]|[^\\]]|[^\\\\]\\[[^/]*)*/'))
    char_range_heuristics = NegativeLookahead(
        Alternative(
            RegExp('[\\n\\t ]'), Series(dwsp__, literal_heuristics),
            Series(Option(Alternative(Text("::"), Text(":?"), Text(":"))),
                   SYM_REGEX, RegExp('\\s*\\]'))))
    CH_LEADIN = Text("0x")
    RE_LEADOUT = Text("/")
    RE_LEADIN = Text("/")
    TIMES = Text("*")
    RNG_DELIM = Text(",")
    RNG_CLOSE = Text("}")
    RNG_OPEN = Text("{")
    ENDL = Text("")
    AND = Text("")
    OR = Text("|")
    DEF = Text("=")
    EOF = Drop(NegativeLookahead(RegExp('.')))
    whitespace = Series(RegExp('~'), dwsp__)
    any_char = Series(Text("."), dwsp__)
    free_char = Alternative(RegExp('[^\\n\\[\\]\\\\]'),
                            RegExp('\\\\[nrt`´\'"(){}\\[\\]/\\\\]'))
    character = Series(CH_LEADIN, HEXCODE)
    char_range = Series(
        Text("["), Lookahead(char_range_heuristics), Option(Text("^")),
        Alternative(character, free_char),
        ZeroOrMore(Alternative(Series(Option(Text("-")), character),
                               free_char)), Series(Text("]"), dwsp__))
    regexp = Series(RE_LEADIN, RE_CORE, RE_LEADOUT, dwsp__)
    plaintext = Alternative(
        Series(RegExp('`(?:(?<!\\\\)\\\\`|[^`])*?`'), dwsp__),
        Series(RegExp('´(?:(?<!\\\\)\\\\´|[^´])*?´'), dwsp__))
    literal = Alternative(
        Series(RegExp('"(?:(?<!\\\\)\\\\"|[^"])*?"'), dwsp__),
        Series(RegExp("'(?:(?<!\\\\)\\\\'|[^'])*?'"), dwsp__))
    symbol = Series(SYM_REGEX, dwsp__)
    multiplier = Series(RegExp('[1-9]\\d*'), dwsp__)
    no_range = Alternative(NegativeLookahead(multiplier),
                           Series(Lookahead(multiplier), TIMES))
    range = Series(RNG_OPEN, dwsp__, multiplier,
                   Option(Series(RNG_DELIM, dwsp__, multiplier)), RNG_CLOSE,
                   dwsp__)
    counted = Alternative(
        Series(countable, range), Series(countable, TIMES, dwsp__, multiplier),
        Series(multiplier, TIMES, dwsp__, countable, mandatory=3))
    option = Alternative(
        Series(Series(Text("["), dwsp__),
               expression,
               Series(Text("]"), dwsp__),
               mandatory=1), Series(element, Series(Text("?"), dwsp__)))
    repetition = Alternative(
        Series(Series(Text("{"), dwsp__),
               no_range,
               expression,
               Series(Text("}"), dwsp__),
               mandatory=2),
        Series(element, Series(Text("*"), dwsp__), no_range))
    oneormore = Alternative(
        Series(Series(Text("{"), dwsp__), no_range, expression,
               Series(Text("}+"), dwsp__)),
        Series(element, Series(Text("+"), dwsp__)))
    group = Series(Series(Text("("), dwsp__),
                   no_range,
                   expression,
                   Series(Text(")"), dwsp__),
                   mandatory=2)
    retrieveop = Alternative(Series(Text("::"), dwsp__),
                             Series(Text(":?"), dwsp__),
                             Series(Text(":"), dwsp__))
    flowmarker = Alternative(Series(Text("!"), dwsp__),
                             Series(Text("&"), dwsp__),
                             Series(Text("<-!"), dwsp__),
                             Series(Text("<-&"), dwsp__))
    ANY_SUFFIX = RegExp('[?*+]')
    literals = OneOrMore(literal)
    pure_elem = Series(element, NegativeLookahead(ANY_SUFFIX), mandatory=1)
    procedure = Series(SYM_REGEX, Series(Text("()"), dwsp__))
    term = Alternative(oneormore, counted, repetition, option, pure_elem)
    difference = Series(
        term,
        Option(
            Series(Series(Text("-"), dwsp__),
                   Alternative(oneormore, pure_elem),
                   mandatory=1)))
    lookaround = Series(flowmarker,
                        Alternative(oneormore, pure_elem),
                        mandatory=1)
    interleave = Series(
        difference,
        ZeroOrMore(
            Series(Series(Text("°"), dwsp__),
                   Option(Series(Text("§"), dwsp__)), difference)))
    sequence = Series(
        Option(Series(Text("§"), dwsp__)), Alternative(interleave, lookaround),
        ZeroOrMore(
            Series(AND, dwsp__, Option(Series(Text("§"), dwsp__)),
                   Alternative(interleave, lookaround))))
    FOLLOW_UP = Alternative(Text("@"), symbol, EOF)
    definition = Series(symbol,
                        DEF,
                        dwsp__,
                        Option(Series(OR, dwsp__)),
                        expression,
                        ENDL,
                        dwsp__,
                        Lookahead(FOLLOW_UP),
                        mandatory=1)
    component = Alternative(regexp, literals, procedure,
                            Series(symbol, NegativeLookahead(DEF)))
    directive = Series(Series(Text("@"), dwsp__),
                       symbol,
                       Series(Text("="), dwsp__),
                       component,
                       ZeroOrMore(Series(Series(Text(","), dwsp__),
                                         component)),
                       Lookahead(FOLLOW_UP),
                       mandatory=1)
    element.set(
        Alternative(Series(Option(retrieveop), symbol,
                           NegativeLookahead(DEF)), literal, plaintext, regexp,
                    Series(character, dwsp__), any_char, whitespace, group))
    countable.set(Alternative(option, oneormore, element))
    expression.set(Series(sequence, ZeroOrMore(Series(OR, dwsp__, sequence))))
    syntax = Series(dwsp__, ZeroOrMore(Alternative(definition, directive)),
                    EOF)
    root__ = syntax
Esempio n. 3
0
class LaTeXGrammar(Grammar):
    r"""Parser for a LaTeX source file.
    """
    _block_environment = Forward()
    _text_element = Forward()
    block = Forward()
    paragraph = Forward()
    param_block = Forward()
    tabular_config = Forward()
    source_hash__ = "d443c74c1540aca5ee7ed767a0da896e"
    disposable__ = re.compile('_\\w+')
    static_analysis_pending__ = []  # type: List[bool]
    parser_initialization__ = ["upon instantiation"]
    error_messages__ = {
        'end_generic_block':
        [(re.compile(r'(?=)'),
          "A block environment must be followed by a linefeed, not by: {1}")],
        'item':
        [(re.compile(r'(?=)'), '\\item without proper content, found: {1}')]
    }
    COMMENT__ = r'%.*'
    comment_rx__ = re.compile(COMMENT__)
    comment__ = RegExp(comment_rx__)
    WHITESPACE__ = r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
    whitespace__ = Whitespace(WHITESPACE__)
    WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
    wsp__ = Whitespace(WSP_RE__)
    dwsp__ = Drop(Whitespace(WSP_RE__))
    EOF = RegExp('(?!.)')
    _BACKSLASH = Drop(RegExp('[\\\\]'))
    _LB = Drop(RegExp('\\s*?\\n|$'))
    NEW_LINE = Series(Drop(RegExp('[ \\t]*')), Option(comment__),
                      Drop(RegExp('\\n')))
    _GAP = Drop(Series(RegExp('[ \\t]*(?:\\n[ \\t]*)+\\n'), dwsp__))
    _WSPC = Drop(OneOrMore(Drop(Alternative(comment__, Drop(RegExp('\\s+'))))))
    _PARSEP = Drop(
        Series(Drop(ZeroOrMore(Drop(Series(whitespace__, comment__)))), _GAP,
               Drop(Option(_WSPC))))
    S = Series(Lookahead(Drop(RegExp('[% \\t\\n]'))), NegativeLookahead(_GAP),
               wsp__)
    LFF = Alternative(Series(NEW_LINE, Option(_WSPC)), EOF)
    _LETTERS = RegExp('\\w+')
    CHARS = RegExp('[^\\\\%$&\\{\\}\\[\\]\\s\\n\'`"]+')
    _TEXT_NOPAR = RegExp(
        '(?:[^\\\\%$&\\{\\}\\[\\]\\(\\)\\n]+(?:\\n(?![ \\t]*\\n))?)+')
    _TEXT = RegExp('(?:[^\\\\%$&\\{\\}\\[\\]\\n\'`"]+(?:\\n(?![ \\t]*\\n))?)+')
    _TAG = RegExp('[\\w=?.:\\-%&\\[\\] /]+')
    _COLON = Text(":")
    _HASH = Text("#")
    _PATHSEP = RegExp('/(?!\\*)')
    _PATH = RegExp('[\\w=~?.,%&\\[\\]-]+')
    UNIT = RegExp('(?!\\d)\\w+')
    _FRAC = RegExp('\\.[0-9]+')
    _INTEGER = RegExp('-?(?:(?:[1-9][0-9]+)|[0-9])')
    _NAME = RegExp('(?!\\d)\\w+\\*?')
    NAME = Capture(Synonym(_NAME))
    IDENTIFIER = Synonym(_NAME)
    _QUALIFIED = Series(
        IDENTIFIER,
        ZeroOrMore(
            Series(NegativeLookbehind(_BACKSLASH), Drop(RegExp('[:.-]')),
                   IDENTIFIER)))
    LINEFEED = RegExp('[\\\\][\\\\]')
    BRACKETS = RegExp('[\\[\\]]')
    SPECIAL = RegExp('[$&_/\\\\\\\\]')
    QUOTEMARK = RegExp('"[`\']?|``?|\'\'?')
    UMLAUT = RegExp(
        '\\\\(?:(?:"[AOUaou])|(?:\'[aeiou])|(?:`[aeiou])|(?:[\\^][aeiou]))')
    ESCAPED = RegExp('\\\\(?:(?:[#%$&_/{} \\n])|(?:~\\{\\s*\\}))')
    TXTCOMMAND = RegExp('\\\\text\\w+')
    CMDNAME = Series(RegExp('\\\\@?(?:(?![\\d_])\\w)+'), dwsp__)
    WARN_Komma = Series(Text(","), dwsp__)
    esc_char = Text(",")
    number = Series(_INTEGER, Option(_FRAC))
    magnitude = Series(number, Option(UNIT))
    info_value = Series(_TEXT_NOPAR, ZeroOrMore(Series(S, _TEXT_NOPAR)))
    info_key = Series(Drop(Text("/")), _NAME)
    info_assoc = Series(
        info_key, dwsp__,
        Option(
            Series(Series(Drop(Text("(")), dwsp__),
                   info_value,
                   Series(Drop(Text(")")), dwsp__),
                   mandatory=1)))
    _info_block = Series(Series(Drop(Text("{")), dwsp__),
                         ZeroOrMore(info_assoc),
                         Series(Drop(Text("}")), dwsp__),
                         mandatory=1)
    value = Alternative(magnitude, _LETTERS, CMDNAME, param_block, block)
    key = Synonym(_QUALIFIED)
    flag = Alternative(_QUALIFIED, magnitude)
    association = Series(key, dwsp__, Series(Drop(Text("=")), dwsp__), value,
                         dwsp__)
    parameters = Series(
        Alternative(association, flag),
        ZeroOrMore(
            Series(NegativeLookbehind(_BACKSLASH),
                   Series(Drop(Text(",")), dwsp__),
                   Alternative(association, flag))), Option(WARN_Komma))
    sequence = Series(
        Option(_WSPC),
        OneOrMore(
            Series(Alternative(paragraph, _block_environment),
                   Option(Alternative(_PARSEP, S)))))
    block_of_paragraphs = Series(Series(Drop(Text("{")), dwsp__),
                                 Option(sequence),
                                 Series(Drop(Text("}")), dwsp__),
                                 mandatory=2)
    special = Alternative(Drop(Text("\\-")),
                          Series(Drop(RegExp('\\\\')), esc_char), UMLAUT,
                          QUOTEMARK)
    _structure_name = Drop(
        Alternative(Drop(Text("subsection")), Drop(Text("section")),
                    Drop(Text("chapter")), Drop(Text("subsubsection")),
                    Drop(Text("paragraph")), Drop(Text("subparagraph")),
                    Drop(Text("item"))))
    _env_name = Drop(
        Alternative(
            Drop(Text("enumerate")), Drop(Text("itemize")),
            Drop(Text("description")),
            Drop(Text("figure")), Drop(Text("quote")), Drop(Text("quotation")),
            Drop(Text("tabular")),
            Drop(
                Series(Drop(Text("displaymath")),
                       Drop(Option(Drop(Text("*")))))),
            Drop(Series(Drop(Text("equation")),
                        Drop(Option(Drop(Text("*")))))),
            Drop(Series(Drop(Text("eqnarray")),
                        Drop(Option(Drop(Text("*"))))))))
    blockcmd = Series(
        _BACKSLASH,
        Alternative(
            Series(
                Alternative(Series(Drop(Text("begin{")), dwsp__),
                            Series(Drop(Text("end{")), dwsp__)), _env_name,
                Series(Drop(Text("}")), dwsp__)),
            Series(_structure_name, Lookahead(Drop(Text("{")))),
            Drop(Text("[")), Drop(Text("]"))))
    no_command = Alternative(
        Series(Drop(Text("\\begin{")), dwsp__),
        Series(Drop(Text("\\end{")), dwsp__),
        Series(_BACKSLASH, _structure_name, Lookahead(Drop(Text("{")))))
    text = Series(
        OneOrMore(Alternative(_TEXT, special)),
        ZeroOrMore(Series(S, OneOrMore(Alternative(_TEXT, special)))))
    cfg_text = ZeroOrMore(
        Alternative(Series(dwsp__, text), CMDNAME, SPECIAL, block))
    config = Series(Series(Drop(Text("[")), dwsp__),
                    Alternative(
                        Series(parameters,
                               Lookahead(Series(Drop(Text("]")), dwsp__))),
                        cfg_text),
                    Series(Drop(Text("]")), dwsp__),
                    mandatory=1)
    item = Series(Series(Drop(Text("\\item")), dwsp__),
                  Option(config),
                  sequence,
                  mandatory=2)
    _block_content = Series(
        Option(Alternative(_PARSEP, S)),
        ZeroOrMore(
            Series(Alternative(_block_environment, _text_element, paragraph),
                   Option(Alternative(_PARSEP, S)))))
    heading = Synonym(block)
    _pth = OneOrMore(Alternative(_PATH, ESCAPED))
    target = Series(
        _pth,
        ZeroOrMore(
            Series(NegativeLookbehind(Drop(RegExp('s?ptth'))), _COLON, _pth)),
        Option(
            Series(
                Alternative(
                    Series(Option(_BACKSLASH), _HASH),
                    Series(NegativeLookbehind(Drop(RegExp('s?ptth'))),
                           _COLON)), _TAG)))
    path = Series(_pth, _PATHSEP)
    protocol = RegExp('\\w+://(?!\\*)')
    urlstring = Series(Option(protocol), ZeroOrMore(path), Option(target))
    href = Series(Series(Drop(Text("\\href{")), dwsp__), urlstring,
                  Series(Drop(Text("}")), dwsp__), block)
    url = Series(Series(Drop(Text("\\url{")), dwsp__), urlstring,
                 Series(Drop(Text("}")), dwsp__))
    ref = Series(
        Alternative(Series(Drop(Text("\\ref{")), dwsp__),
                    Series(Drop(Text("\\pageref{")), dwsp__)), CHARS,
        Series(Drop(Text("}")), dwsp__))
    label = Series(Series(Drop(Text("\\label{")), dwsp__), CHARS,
                   Series(Drop(Text("}")), dwsp__))
    hypersetup = Series(Series(Drop(Text("\\hypersetup")), dwsp__),
                        param_block)
    pdfinfo = Series(Series(Drop(Text("\\pdfinfo")), dwsp__), _info_block)
    documentclass = Series(Series(Drop(Text("\\documentclass")), dwsp__),
                           Option(config), block)
    column_nr = Synonym(_INTEGER)
    cline = Series(Series(Drop(Text("\\cline{")), dwsp__), column_nr,
                   Series(Drop(Text("-")), dwsp__), column_nr,
                   Series(Drop(Text("}")), dwsp__))
    hline = Series(Text("\\hline"), dwsp__)
    multicolumn = Series(Series(Drop(Text("\\multicolumn")), dwsp__),
                         Series(Drop(Text("{")), dwsp__), column_nr,
                         Series(Drop(Text("}")), dwsp__), tabular_config,
                         block_of_paragraphs)
    caption = Series(Series(Drop(Text("\\caption")), dwsp__), block)
    includegraphics = Series(Series(Drop(Text("\\includegraphics")), dwsp__),
                             Option(config), block)
    footnote = Series(Series(Drop(Text("\\footnote")), dwsp__),
                      block_of_paragraphs)
    citep = Series(
        Alternative(Series(Drop(Text("\\citep")), dwsp__),
                    Series(Drop(Text("\\cite")), dwsp__)), Option(config),
        block)
    citet = Series(Series(Drop(Text("\\citet")), dwsp__), Option(config),
                   block)
    generic_command = Alternative(
        Series(NegativeLookahead(no_command), CMDNAME,
               ZeroOrMore(Series(dwsp__, Alternative(config, block)))),
        Series(Drop(Text("{")),
               CMDNAME,
               _block_content,
               Drop(Text("}")),
               mandatory=3))
    assignment = Series(
        NegativeLookahead(no_command), CMDNAME, Series(Drop(Text("=")),
                                                       dwsp__),
        Alternative(Series(number, Option(UNIT)), block, CHARS))
    text_command = Alternative(TXTCOMMAND, ESCAPED, BRACKETS)
    _known_command = Alternative(citet, citep, footnote, includegraphics,
                                 caption, multicolumn, hline, cline,
                                 documentclass, pdfinfo, hypersetup, label,
                                 ref, href, url, item)
    _command = Alternative(_known_command, text_command, assignment,
                           generic_command)
    _inline_math_text = RegExp('[^$]*')
    _im_bracket = Series(Drop(Text("\\(")),
                         _inline_math_text,
                         Drop(Text("\\)")),
                         mandatory=1)
    _im_dollar = Series(Drop(Text("$")),
                        _inline_math_text,
                        Drop(Text("$")),
                        mandatory=1)
    inline_math = Alternative(_im_dollar, _im_bracket)
    end_environment = Series(Drop(RegExp('\\\\end{')),
                             Pop(NAME),
                             Drop(RegExp('}')),
                             mandatory=1)
    begin_environment = Series(Drop(RegExp('\\\\begin{')),
                               NAME,
                               Drop(RegExp('}')),
                               mandatory=1)
    _end_inline_env = Synonym(end_environment)
    _begin_inline_env = Alternative(
        Series(NegativeLookbehind(_LB), begin_environment),
        Series(begin_environment, NegativeLookahead(LFF)))
    generic_inline_env = Series(_begin_inline_env,
                                dwsp__,
                                paragraph,
                                NegativeLookahead(_PARSEP),
                                _end_inline_env,
                                mandatory=4)
    _known_inline_env = Synonym(inline_math)
    _inline_environment = Alternative(_known_inline_env, generic_inline_env)
    _line_element = Alternative(text, _inline_environment, _command, block)
    SubParagraph = Series(Series(Drop(Text("\\subparagraph")), dwsp__),
                          heading, Option(sequence))
    hide_from_toc = Series(Text("*"), dwsp__)
    SubParagraphs = OneOrMore(Series(Option(_WSPC), SubParagraph))
    Paragraph = Series(Series(Drop(Text("\\paragraph")), dwsp__), heading,
                       ZeroOrMore(Alternative(sequence, SubParagraphs)))
    cfg_separator = Alternative(Drop(Text("|")),
                                Series(Drop(Text("!")), block))
    cfg_unit = Series(Drop(Text("{")), number, UNIT, Drop(Text("}")))
    cfg_celltype = RegExp('[lcrp]')
    frontpages = Synonym(sequence)
    rb_down = Series(Series(Drop(Text("[")), dwsp__), number, UNIT, dwsp__,
                     Series(Drop(Text("]")), dwsp__))
    rb_up = Series(Series(Drop(Text("[")), dwsp__), number, UNIT, dwsp__,
                   Series(Drop(Text("]")), dwsp__))
    rb_offset = Series(Series(Drop(Text("{")), dwsp__), number, UNIT, dwsp__,
                       Series(Drop(Text("}")), dwsp__))
    raisebox = Series(Series(Drop(Text("\\raisebox")), dwsp__), rb_offset,
                      Option(rb_up), Option(rb_down), block)
    tabular_cell = Alternative(
        Series(raisebox, Option(Alternative(S, _PARSEP))),
        ZeroOrMore(Series(_line_element, Option(Alternative(S, _PARSEP)))))
    tabular_row = Series(
        Alternative(multicolumn, tabular_cell),
        ZeroOrMore(
            Series(Series(Drop(Text("&")), dwsp__),
                   Alternative(multicolumn, tabular_cell))),
        Alternative(
            Series(Series(Drop(Text("\\\\")), dwsp__),
                   Alternative(hline, ZeroOrMore(cline)), Option(_PARSEP)),
            Lookahead(Drop(Text("\\end{tabular}")))))
    tabular = Series(Series(Drop(Text("\\begin{tabular}")), dwsp__),
                     tabular_config,
                     ZeroOrMore(Alternative(tabular_row, _WSPC)),
                     Series(Drop(Text("\\end{tabular}")), dwsp__),
                     mandatory=3)
    no_numbering = Text("*")
    _block_math = RegExp(
        '(?:[^\\\\]*[\\\\]?(?!end\\{(?:eqnarray|equation|displaymath)\\*?\\}|\\])\\s*)*'
    )
    eqnarray = Series(Drop(Text("\\begin{eqnarray")),
                      Option(no_numbering),
                      Series(Drop(Text("}")), dwsp__),
                      _block_math,
                      Drop(Text("\\end{eqnarray")),
                      Option(Drop(Text("*"))),
                      Series(Drop(Text("}")), dwsp__),
                      mandatory=3)
    equation = Series(Drop(Text("\\begin{equation")),
                      Option(no_numbering),
                      Series(Drop(Text("}")), dwsp__),
                      _block_math,
                      Drop(Text("\\end{equation")),
                      Option(Drop(Text("*"))),
                      Series(Drop(Text("}")), dwsp__),
                      mandatory=3)
    _dmath_short_form = Series(Series(Drop(Text("\\[")), dwsp__),
                               _block_math,
                               Series(Drop(Text("\\]")), dwsp__),
                               mandatory=1)
    _dmath_long_form = Series(Drop(Text("\\begin{displaymath")),
                              Option(no_numbering),
                              Series(Drop(Text("}")), dwsp__),
                              _block_math,
                              Drop(Text("\\end{displaymath")),
                              Option(Drop(Text("*"))),
                              Series(Drop(Text("}")), dwsp__),
                              mandatory=3)
    displaymath = Alternative(_dmath_long_form, _dmath_short_form)
    verbatim_text = RegExp('(?:(?!\\\\end{verbatim})[\\\\]?[^\\\\]*)*')
    verbatim = Series(Series(Drop(Text("\\begin{verbatim}")), dwsp__),
                      verbatim_text,
                      Series(Drop(Text("\\end{verbatim}")), dwsp__),
                      mandatory=2)
    quotation = Alternative(
        Series(Series(Drop(Text("\\begin{quotation}")), dwsp__),
               sequence,
               Series(Drop(Text("\\end{quotation}")), dwsp__),
               mandatory=2),
        Series(Series(Drop(Text("\\begin{quote}")), dwsp__),
               sequence,
               Series(Drop(Text("\\end{quote}")), dwsp__),
               mandatory=2))
    figure = Series(Series(Drop(Text("\\begin{figure}")), dwsp__),
                    sequence,
                    Series(Drop(Text("\\end{figure}")), dwsp__),
                    mandatory=2)
    Paragraphs = OneOrMore(Series(Option(_WSPC), Paragraph))
    _itemsequence = Series(
        Option(_WSPC),
        ZeroOrMore(Series(Alternative(item, _command), Option(_WSPC))))
    description = Series(Series(Drop(Text("\\begin{description}")), dwsp__),
                         _itemsequence,
                         Series(Drop(Text("\\end{description}")), dwsp__),
                         mandatory=2)
    enumerate = Series(Series(Drop(Text("\\begin{enumerate}")), dwsp__),
                       _itemsequence,
                       Series(Drop(Text("\\end{enumerate}")), dwsp__),
                       mandatory=2)
    itemize = Series(Series(Drop(Text("\\begin{itemize}")), dwsp__),
                     _itemsequence,
                     Series(Drop(Text("\\end{itemize}")), dwsp__),
                     mandatory=2)
    end_generic_block = Series(end_environment,
                               Alternative(
                                   LFF,
                                   Series(dwsp__, Lookahead(Drop(Text("}"))))),
                               mandatory=1)
    begin_generic_block = Series(Lookbehind(_LB), begin_environment)
    generic_block = Series(begin_generic_block,
                           ZeroOrMore(Alternative(sequence, item)),
                           end_generic_block,
                           mandatory=2)
    math_block = Alternative(equation, eqnarray, displaymath)
    _known_environment = Alternative(itemize, enumerate, description, figure,
                                     tabular, quotation, verbatim, math_block)
    _has_block_start = Drop(
        Alternative(Drop(Text("\\begin{")), Drop(Text("\\["))))
    preamble = OneOrMore(Series(Option(_WSPC), _command))
    SubSubSection = Series(Drop(Text("\\subsubsection")),
                           Option(hide_from_toc), heading,
                           ZeroOrMore(Alternative(sequence, Paragraphs)))
    Index = Series(Option(_WSPC), Series(Drop(Text("\\printindex")), dwsp__))
    Bibliography = Series(Option(_WSPC),
                          Series(Drop(Text("\\bibliography")), dwsp__),
                          heading)
    SubSubSections = OneOrMore(Series(Option(_WSPC), SubSubSection))
    SubSection = Series(
        Drop(Text("\\subsection")), Option(hide_from_toc), heading,
        ZeroOrMore(Alternative(sequence, SubSubSections, Paragraphs)))
    SubSections = OneOrMore(Series(Option(_WSPC), SubSection))
    Section = Series(
        Drop(Text("\\section")), Option(hide_from_toc), heading,
        ZeroOrMore(Alternative(sequence, SubSections, Paragraphs)))
    Sections = OneOrMore(Series(Option(_WSPC), Section))
    Chapter = Series(Drop(Text("\\chapter")), Option(hide_from_toc), heading,
                     ZeroOrMore(Alternative(sequence, Sections, Paragraphs)))
    Chapters = OneOrMore(Series(Option(_WSPC), Chapter))
    document = Series(Option(_WSPC),
                      Series(Drop(Text("\\begin{document}")), dwsp__),
                      frontpages,
                      Alternative(Chapters, Sections),
                      Option(Bibliography),
                      Option(Index),
                      Option(_WSPC),
                      Series(Drop(Text("\\end{document}")), dwsp__),
                      Option(_WSPC),
                      EOF,
                      mandatory=2)
    param_block.set(
        Series(Series(Drop(Text("{")), dwsp__), Option(parameters),
               Series(Drop(Text("}")), dwsp__)))
    block.set(
        Series(Series(Drop(Text("{")), dwsp__),
               _block_content,
               Drop(Text("}")),
               mandatory=2))
    _text_element.set(Alternative(_line_element, LINEFEED))
    paragraph.set(
        OneOrMore(Series(NegativeLookahead(blockcmd), _text_element,
                         Option(S))))
    tabular_config.set(
        Series(Series(Drop(Text("{")), dwsp__),
               OneOrMore(
                   Alternative(Series(cfg_celltype, Option(cfg_unit)),
                               cfg_separator, Drop(RegExp(' +')))),
               Series(Drop(Text("}")), dwsp__),
               mandatory=2))
    _block_environment.set(
        Alternative(Series(Lookahead(_has_block_start), _known_environment),
                    generic_block))
    latexdoc = Series(preamble, document, mandatory=1)
    root__ = TreeReduction(latexdoc, CombinedParser.MERGE_TREETOPS)
Esempio n. 4
0
class json_fail_tolerantGrammar(Grammar):
    r"""Parser for a json_fail_tolerant source file.
    """
    _element = Forward()
    source_hash__ = "42cb00a4f8192986733859d4709c5b37"
    disposable__ = re.compile('..(?<=^)')
    static_analysis_pending__ = []  # type: List[bool]
    parser_initialization__ = ["upon instantiation"]
    error_messages__ = {
        'member':
        [(re.compile(r'[\'`´]'),
          'String values must be enclosed by double-quotation marks: "..."!')],
        'string':
        [(re.compile(r'\\'),
          'Illegal escape sequence "{1}" Allowed values are \\\\/, \\\\\\\\, \\\\b, \\\\n, \\\\r, \\\\t, or \\\\u.'
          ), (re.compile(r'(?=)'), 'Illegal character "{1}" in string.')],
        '_OBJECT_SEPARATOR': [(re.compile(r'(?!,)'), 'Missing separator ","')],
        '_ARRAY_SEPARATOR': [(re.compile(r'(?!,)'), 'Missing separator ","')]
    }
    skip_rules__ = {'string': [re.compile(r'(?=")')]}
    resume_rules__ = {
        'object': [re.compile(r'(?:[^{}]|(?:\{.*\}))*\}\s*')],
        'array': [re.compile(r'(?:[^\[\]]|(?:\[.*\]))*\]\s*')],
        'member': [re.compile(r'(?=(?:"[^"\n]+"\s*:)|\}|,)')],
        '_OBJECT_SEPARATOR': [re.compile(r'(?=)')],
        '_ARRAY_SEPARATOR': [re.compile(r'(?=)')]
    }
    COMMENT__ = r'(?:\/\/|#).*'
    comment_rx__ = re.compile(COMMENT__)
    WHITESPACE__ = r'\s*'
    WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
    wsp__ = Whitespace(WSP_RE__)
    dwsp__ = Drop(Whitespace(WSP_RE__))
    _ARRAY_SEPARATOR = Series(NegativeLookahead(Text("]")),
                              Lookahead(Text(",")),
                              Option(Series(Drop(Text(",")), dwsp__)),
                              mandatory=1)
    _OBJECT_SEPARATOR = Series(NegativeLookahead(Text("}")),
                               Lookahead(Text(",")),
                               Option(Series(Drop(Text(",")), dwsp__)),
                               mandatory=1)
    _EOF = NegativeLookahead(RegExp('.'))
    EXP = Option(
        Series(Alternative(Text("E"), Text("e")),
               Option(Alternative(Text("+"), Text("-"))), RegExp('[0-9]+')))
    DOT = Text(".")
    FRAC = Option(Series(DOT, RegExp('[0-9]+')))
    NEG = Text("-")
    INT = Series(Option(NEG),
                 Alternative(RegExp('[0-9]'), RegExp('[1-9][0-9]+')))
    HEX = RegExp('[0-9a-fA-F][0-9a-fA-F]')
    UNICODE = Series(Series(Drop(Text("\\u")), dwsp__), HEX, HEX)
    ESCAPE = Alternative(RegExp('\\\\[/bnrt\\\\]'), UNICODE)
    PLAIN = RegExp('[^"\\\\]+')
    _CHARACTERS = ZeroOrMore(Alternative(PLAIN, ESCAPE))
    null = Series(Text("null"), dwsp__)
    bool = Alternative(Series(RegExp('true'), dwsp__),
                       Series(RegExp('false'), dwsp__))
    number = Series(INT, FRAC, EXP, dwsp__)
    string = Series(Text('"'), _CHARACTERS, Text('"'), dwsp__, mandatory=1)
    array = Series(
        Series(Drop(Text("[")), dwsp__),
        Option(
            Series(_element,
                   ZeroOrMore(Series(_ARRAY_SEPARATOR, _element,
                                     mandatory=1)))),
        Series(Drop(Text("]")), dwsp__))
    member = Series(string,
                    Series(Drop(Text(":")), dwsp__),
                    _element,
                    mandatory=1)
    object = Series(Series(Drop(Text("{")), dwsp__),
                    member,
                    ZeroOrMore(Series(_OBJECT_SEPARATOR, member, mandatory=1)),
                    Series(Drop(Text("}")), dwsp__),
                    mandatory=3)
    _element.set(Alternative(object, array, string, number, bool, null))
    json = Series(dwsp__, _element, _EOF)
    root__ = json