def __init__(self):
        # TODO: finish this parser to it does more than just parse indi and GPO reponses
        indi_operator = string_from("indi")
        gp_operator = string_from("GPO", "GPI")
        space = string(" ")
        obj = regex(r"[a-zA-Z0-9.#]*")
        name = regex(r"[a-zA-Z]*")
        simple_string = regex(r"[a-zA-Z ]*")
        equals = string("=")
        value = regex(r"[^,]*") | regex(r'".*"')
        number = regex(r"[0-9]+")
        gp_value = regex(r"[hl]").map(lambda v: {"h": False, "l": True}[v]) * 5

        indi_parser = seq(indi_operator << space).then(
            seq(
                path=obj << space.optional(),
                info=seq(
                    name=name << equals,
                    value=value <<
                    string(", ").optional()).map(lambda x: {
                        x["name"]: x["value"]
                    }).many().map(
                        lambda kv: {k: v
                                    for d in kv for k, v in d.items()}),
            ))
        gp_parser = seq(gp_operator << space).then(
            seq(number=number << space, pins=gp_value))
        error_parser = seq(string("ERROR") << space).then(
            seq(number << space, simple_string))

        self.p = indi_parser | gp_parser | error_parser
Beispiel #2
0
def make_element_parser(config, content, jinja):
    container_element = make_container_element_parser(
        config,
        content=content,
        jinja=jinja,
    )

    self_closing_element_opening_tag = make_opening_tag_parser(
        config,
        tag_name_parser=P.string_from(*SELF_CLOSING_ELEMENTS),
        allow_slash=True,
        jinja=jinja,
    )

    self_closing_element = (
        locate(P.seq(
            self_closing_element_opening_tag.skip(whitespace),
            P.success(None),  # No content
            P.success(None),  # No closing tag
        ))
        .combine(_combine_element)
    )

    style = make_raw_text_element_parser(config, 'style', jinja=jinja)
    script = make_raw_text_element_parser(config, 'script', jinja=jinja)

    return style | script | self_closing_element | container_element
Beispiel #3
0
 def test_string_from_transform(self):
     titles = string_from("Mr", "Mr.", "Mrs", "Mrs.",
                          transform=lambda s: s.lower())
     self.assertEqual(titles.parse("mr"), "Mr")
     self.assertEqual(titles.parse("mr."), "Mr.")
     self.assertEqual(titles.parse("MR"), "Mr")
     self.assertEqual(titles.parse("MR."), "Mr.")
Beispiel #4
0
def make_element_parser(config, content, jinja):
    container_element = make_container_element_parser(
        config, content=content, jinja=jinja
    )

    void_element_opening_tag = make_opening_tag_parser(
        config,
        tag_name_parser=P.string_from(*VOID_ELEMENTS),
        allow_slash=True,
        jinja=jinja,
    )

    void_element = locate(
        P.seq(
            void_element_opening_tag.skip(whitespace),
            P.success(None),  # No content
            P.success(None),  # No closing tag
        )
    ).combine(_combine_element)

    svg_self_closing_tag = make_opening_tag_parser(
        config,
        tag_name_parser=P.string_from(*SVG_SELF_CLOSING_ELEMENTS),
        mandate_slash=True,
        jinja=jinja,
    )

    svg_self_closing_element = locate(
        P.seq(
            svg_self_closing_tag.skip(whitespace),
            P.success(None),  # No content
            P.success(None),  # No closing tag
        )
    ).combine(_combine_element)

    style = make_raw_text_element_parser(config, "style", jinja=jinja)
    script = make_raw_text_element_parser(config, "script", jinja=jinja)

    return (
        style
        | script
        | void_element
        | svg_self_closing_element
        | container_element
    )
Beispiel #5
0
def word_variable_complex():
    yield string("${")
    ref = yield variable_name.map(ConstantString).map(VarRef)
    op = yield string_from("##", "#", "%%", "%").optional()
    if op is not None:
        param = yield word
        ref = VarOp(ref, op, param)
    yield string("}")
    return ref
Beispiel #6
0
    def test_string_from(self):
        titles = string_from("Mr", "Mr.", "Mrs", "Mrs.")
        self.assertEqual(titles.parse("Mr"), "Mr")
        self.assertEqual(titles.parse("Mr."), "Mr.")
        self.assertEqual((titles + string(" Hyde")).parse("Mr. Hyde"),
                         "Mr. Hyde")
        with self.assertRaises(ParseError) as err:
            titles.parse('foo')

        ex = err.exception
        self.assertEqual(str(ex), """expected one of 'Mr', 'Mr.', 'Mrs', 'Mrs.' at 0:0""")
Beispiel #7
0
def passport():
    _fields = {}
    while True:
        fkey, fval = yield field
        _fields[fkey] = fval

        m = yield string_from(" ", "\n").optional()
        if m == "\n":
            n = yield peek(string("\n").optional())
            e = yield peek(eof.result(True).optional())
            if n == "\n" or e:
                break
        elif m is None:
            break

    return _fields
Beispiel #8
0
CHIFFRES_ARABES = case_insensitive_string("1er").result("1") | regex(r"\d+")

CHIFFRES_ROMAINS = case_insensitive_string("Ier") | regex(r"[IVXLCDM]+")

LETTRES = regex(r"[A-Z]+")

NUMERO = (
    string("liminaire").result("0")
    | case_insensitive_string("premier").result("1")
    | CHIFFRES_ARABES
    | CHIFFRES_ROMAINS
    | LETTRES
)

MULTIPLICATIF = string_from(*ADJECTIFS_MULTIPLICATIFS)

ADDITIONNEL = regex(r"[A-Z]+")  # alias "andouillette" (AAAAA)

MULT_ADD = (
    seq(MULTIPLICATIF.skip(whitespace), ADDITIONNEL).map(" ".join)
    | MULTIPLICATIF
    | ADDITIONNEL
)


# Divisions uniques

INTITULE = string_from(
    "Intitulé de la proposition de loi",
    "Intitulé du projet de loi",
Beispiel #9
0
    '*': 7,
    '/': 7,
    '+': 6,
    '-': 6,
    '<': 5,
    '>': 5,
    '<=': 5,
    '>=': 5,
    '=': 4,
    '<>': 4,
    'and': 2,
    'or': 1,
}
prec = lambda op: op_prec_table[op]

operator = lexeme(parsy.string_from(*op_prec_table))

unary_op_list = ['-', 'not']
unary_op = lexeme(parsy.string_from(*unary_op_list))

symbol_op_list = [
    '[]', '.', '+', '-', '*', '/', '=', '<>', '>', '<', '>=', '<='
]
ascii_op_list = ['and', 'or', 'not']


# Parser helper combinators
def peek(parser):
    @parsy.Parser
    def helper(stream, index):
        try:
Beispiel #10
0
"""
Stripped down logo lexer, for tokenizing Turtle Logo programs like:

   fd 1
   bk 2
   rt 90

etc.
"""

from parsy import eof, regex, seq, string, string_from, whitespace

command = string_from("fd", "bk", "rt", "lt")
number = regex(r'[0-9]+').map(int)
optional_whitespace = regex(r'\s*')
eol = string("\n")
line = seq(optional_whitespace >> command, whitespace >> number,
           (eof | eol | (whitespace >> eol)).result("\n"))
flatten_list = lambda ls: sum(ls, [])
lexer = line.many().map(flatten_list)
Beispiel #11
0
        else:
            return (yield parser)

    return a


alpha_numeric = p.regex('[a-zA-Z0-9_]')

lower_alpha = p.regex('[a-z]')
upper_alpha = p.regex('[A-Z]')

lower_word = p.seq(lower_alpha, alpha_numeric.many().concat()).concat()
upper_word = p.seq(upper_alpha, alpha_numeric.many().concat()).concat()

integer = p.seq(
    p.string_from('+', '-').optional(),
    p.decimal_digit.at_least(1).concat().map(int)).combine(
        lambda sign, number: -number if sign == '-' else number)

sq_char = p.regex('[a-zA-Z0-9 _\\-/~!@#$%^&*(),."]')
single_quote = p.string("'")
single_quoted = single_quote >> sq_char.at_least(1).concat() << single_quote

dq_char = p.regex("[a-zA-Z0-9 _\\-/~!@#$%^&*(),.']")
double_quote = p.string('"')
double_quoted = double_quote >> dq_char.at_least(1).concat() << double_quote

atomic_word = lower_word | single_quoted

name = atomic_word | integer
Beispiel #12
0
    "dark red bags contain 2 dark orange bags.",
    "dark orange bags contain 2 dark yellow bags.",
    "dark yellow bags contain 2 dark green bags.",
    "dark green bags contain 2 dark blue bags.",
    "dark blue bags contain 2 dark violet bags.",
    "dark violet bags contain no other bags.",
])

# Parsing Combinators
optional_whitespace = regex(r"\s*")
word = regex(r"\w+")
number = regex(r"\d+")

color_att = word
color = word
bag_term = string_from("bag", "bags")

bag = seq(color_att, whitespace >> color).map(tuple) << whitespace << bag_term

no_content = string("no other bags").map(lambda x: {})
content = (seq(number.map(int),
               whitespace >> bag).map(lambda a: (a[1], a[0])).sep_by(
                   string(", ")).map(dict))
bag_contents = no_content | content

rule = seq(bag, whitespace >> string("contain") >> whitespace >>
           bag_contents) << string(".")
rules = rule.sep_by(string("\n")).map(dict) << string("\n").optional()


def find_containers(mapping, bag):
Beispiel #13
0
def case_insensitive_string_from(*expected_strings: str) -> Any:
    return string_from(*expected_strings, transform=lambda s: s.lower())
Beispiel #14
0
CHIFFRES_ARABES = case_insensitive_string_from(
    "1e", "1er", "1ère").result("1") | regex(r"\d+")

CHIFFRES_ROMAINS = case_insensitive_string("Ier") | regex(r"[IVXLCDM]+")

LETTRES_CAPITALES = regex(r"[A-Zİ]+").map(
    remove_accents)  # allow dotted capital i

NUMERO = (string("liminaire").result("0")
          | case_insensitive_string_from("premier", "unique").result("1")
          | string("PRÉLIMINAIRE")
          | CHIFFRES_ARABES
          | CHIFFRES_ROMAINS
          | LETTRES_CAPITALES)

MULTIPLICATIF = string_from(*ADJECTIFS_MULTIPLICATIFS)

ADDITIONNEL = LETTRES_CAPITALES  # alias "andouillette" (AAAAA)

MULT_ADD = (seq(MULTIPLICATIF << whitespace.optional(), ADDITIONNEL).map(
    " ".join)
            | MULTIPLICATIF
            | ADDITIONNEL)

# Divisions uniques

INTITULE = ((case_insensitive_string("Intitulé") >> whitespace >>
             case_insensitive_string_from("de la", "de  la", "du") >>
             whitespace).optional() >> case_insensitive_string_from(
                 "proposition de loi", "projet de loi",
                 "texte").result("titre") << regex(".*"))
decimal = regex(r'-?[0-9_]+(_[0-9]+)*d?').map(
    lambda s: int(re.sub(r'[_d]', '', s)))

octal = regex(r'-?[0-7_]+(_[0-7]+)*o').map(
    lambda s: int(re.sub(r'[_o]', '', s), 8))

binary = regex(r'-?[0-1_]+(_[0-1]+)*b').map(
    lambda s: int(re.sub(r'[_b]', '', s), 2))

digit = binary | octal | decimal | hexadecimal

identifier = regex(r'[a-z][a-z0-9]*([-_][a-z0-9]+)*')

mnemonic = string_from("halt", "noop", "push", "load", "store", "xchg", "dup",
                       "dupn", "swap", "pop", "jump", "link", "spawn", "add",
                       "sub", "mul", "div", "mod", "neg", "not", "and", "or",
                       "xor")

condition = string_from("always", "overflow", "zero", "non-zero", "positive",
                        "negative", "high", "safe")

comment = (string(";") >> regex(r'[^\n\r]*') << (eol | eof)).map(comment)

instruction = seq(condition=(condition << string(".")).optional(),
                  mnemonic=mnemonic,
                  operand=(ws1 >>
                           (digit | identifier)).optional()).map(instruction)

label = (identifier << string(":")).map(label)

statement = label | instruction | comment
Beispiel #16
0
# We don't support ' in strings or escaping for simplicity
string_literal = regex(r"'[^']*'").map(lambda s: String(s[1:-1]))

identifier = regex('[a-zA-Z][a-zA-Z0-9_]*')

field = identifier.map(Field)

table = identifier.map(Table)

space = regex(r'\s+')  # non-optional whitespace
padding = regex(r'\s*')  # optional whitespace

column_expr = field | string_literal | number_literal

operator = string_from('=', '<', '>', '<=', '>=')

comparison = seq(
    left=column_expr << padding,
    operator=operator,
    right=padding >> column_expr,
).combine_dict(Comparison)

SELECT = string('SELECT')
FROM = string('FROM')
WHERE = string('WHERE')

# Here we demonstrate use of leading underscore to discard parts we don't want,
# which is more readable and convenient than `<<` and `>>` sometimes.
select = seq(
    _select=SELECT + space,
Beispiel #17
0
}

_essential_keys = {
    "byr",
    "iyr",
    "eyr",
    "hgt",
    "hcl",
    "ecl",
    "pid",
    #'cid',
}

optional_whitespace = regex(r"\s*")

field_key = string_from(*passport_validation.keys()).desc("field key")
field_value = regex(r"[A-Za-z0-9#]+").desc("field value")
field = seq(field_key, string(":") >> field_value).map(tuple)
passport_end = eof | string_from("\n", "\n\n")


@generate
def passport():
    _fields = {}
    while True:
        fkey, fval = yield field
        _fields[fkey] = fval

        m = yield string_from(" ", "\n").optional()
        if m == "\n":
            n = yield peek(string("\n").optional())
Beispiel #18
0
    notes = yield get_notes

    # make a copy of this list so that we don't perturb the note.
    hds = list(notes.get('hds', []))
    if len(hds) > 0:
        return fail("Want additional heredocs")

    return CommandSequence(seq)


eaten_newline = string("\\\n").result(Token(""))
variable_id = regex("[a-zA-Z_][a-zA-Z0-9_]*")
variable_name = regex("[1-9][0-9]*|[0\\?!#@\\*]") | variable_id
word_id = regex('[^\\s\'()$=";|<>&\\\\{}`*]+').map(ConstantString)
word_redir = string_from("<&", "<<", "<", ">&", ">>", ">").map(Token)
word_single = (
    string("'") >> regex("[^']*") << string("'")).map(ConstantString)
word_expr = string("$(") >> command_sequence << string(")")
word_backslash = string("\\") >> any_char.map(ConstantString)
word_variable_reference = (
    string("$") >> variable_name).map(ConstantString).map(VarRef)
word_variable_name = variable_id.map(Id)
word_equals = string("=").map(Token)
word_dbrace = string("{}").map(Token)
word_glob = string("**").result(STARSTAR) | string("*").result(STAR)

e_id = variable_id


@generate("word-variable-complex")
Beispiel #19
0
            definition = f'{definition} = {self.default_value}'
        return definition


C_COMMENT_PATTERN = re.compile(r'\/\/.*$|\/\*.*?\*\/', re.MULTILINE)

EQ = parsy.char_from('=').desc('=')
COMMA = parsy.char_from(',').desc(',')
COLON = parsy.char_from(':').desc(':')
SEMICOLON = parsy.char_from(';').desc(';')
L_BRACE = parsy.char_from('{').desc('{')
R_BRACE = parsy.char_from('}').desc('}')
L_PARENTHESES = parsy.char_from('(').desc('(')
R_PARENTHESES = parsy.char_from(')').desc(')')

TYPE = parsy.string_from(*TYPES)
WHITESPACE = parsy.whitespace.desc('whitespace')
OPTIONAL_WHITESPACE = WHITESPACE.optional()
SEMANTIC = parsy.string_from(*SEMANTICS)
IDENTIFIER_CHARS = parsy.letter | parsy.decimal_digit | parsy.string("_")
IDENTIFIER = (parsy.letter +
              IDENTIFIER_CHARS.many().concat()).desc('identifier')
FLOAT = parsy.regex(r'[+-]?(?:\d+\.?\d*|\.\d+)').desc('float').map(Float)
ARGS = FLOAT.sep_by(
    OPTIONAL_WHITESPACE >> parsy.string(',') << OPTIONAL_WHITESPACE, min=1)
TYPE_CONSTRUCTOR = parsy.seq(
    TYPE << OPTIONAL_WHITESPACE << L_PARENTHESES, OPTIONAL_WHITESPACE >> ARGS
    << OPTIONAL_WHITESPACE << R_PARENTHESES).combine(TypeConstructor)
DEFAULT_VALUE = FLOAT | TYPE_CONSTRUCTOR
VARYING = (
    parsy.seq(TYPE << WHITESPACE, IDENTIFIER << OPTIONAL_WHITESPACE,
Beispiel #20
0
comma = parsy.regex(r'\s*,\s*')
semicolon = parsy.regex(r'\s*;\s*')

intLit = parsy.regex(r'(0|[1-9][0-9]*)').map(int).desc("integer")

floatLit = parsy.regex(r'-?(0|[1-9][0-9]*)(\.[0-9]+)?([eE][+-]?[0-9]+)?').map(
    float).desc("floating point number")

singleQuoteString = parsy.regex(r"'[^']*'").map(lambda s: s[1:-1])
doubleQuoteString = parsy.regex(r'"[^"]*"').map(lambda s: s[1:-1])
strLit = (singleQuoteString | doubleQuoteString).desc("string")

identifier = parsy.regex(r'[a-zA-Z][a-zA-Z0-9_]*').map(Identifier).desc(
    "identifier (variable)")

operator = parsy.string_from('=', '<', '>', '<=', '>=')

mapoper = parsy.string_from('<->')


@parsy.generate
def function():
    fname = yield identifier
    yield oparen
    args = yield (mapping_binop | basic_expr.sep_by(comma))
    yield cparen
    return Function(fname, args)


function.desc("function call")