def test_or_with_then(): parser = (string('\\') >> string('y')) | string('z') assert parser.parse('\\y') == 'y' assert parser.parse('z') == 'z' try: parser.parse('\\z'); assert False except ParseError: pass
def test_then(): xy_parser = string('x') >> string('y') assert xy_parser.parse('xy') == 'y' try: xy_parser.parse('y'); assert False except ParseError: pass try: xy_parser.parse('z'); assert False except ParseError: pass
def lexer(code): whitespace = regex(r'\s*') integer = digit.at_least(1).concat().map(int) float_ = (digit.many() + string('.').result(['.']) + digit.many()).concat().map(float) parser = whitespace >> ( (float_ | integer | regex(r'[()*/+-]')) << whitespace).many() return parser.parse(code)
def lexeme(p): """ From a parser (or string), make a parser that consumes whitespace on either side. """ if isinstance(p, str): p = string(p) return regex(r'\s*') >> p << regex(r'\s*')
def def_top_level(keyword: str, ty: type): return seq( attrs=attributes << padding, _1=string(keyword) << whitespace, name=identifier << padding, bases=(colon >> identifier.sep_by(comma) << padding).optional(), methods=lbrace >> method.many() << rbrace, ).combine_dict(ty)
def make_jinja_parser(config, content): # Allow to override elements with the configuration jinja_structured_elements_names = dict( (names[0], names) for names in ( DEFAULT_JINJA_STRUCTURED_ELEMENTS_NAMES + config.get("jinja_custom_elements_names", []) ) ).values() jinja_structured_element = P.alt( *[ make_jinja_element_parser( [P.string(name) for name in names], content=content ) for names in jinja_structured_elements_names ] ) # These tag names can't begin a Jinja element jinja_intermediate_tag_names = set( n for _, *sublist in jinja_structured_elements_names for n in sublist ) jinja_intermediate_tag_name = P.alt( *(P.string(n) for n in jinja_intermediate_tag_names) ) jinja_element_single = make_jinja_element_parser( [ P.alt( # HACK: If we allow `{% if %}`s without `{% endif %}`s here, # `make_jinja_optional_container_parser` doesn’t work. It # is probably better to reject any structured tag name here. P.string("if"), jinja_intermediate_tag_name, ) .should_fail("not an intermediate Jinja tag name") .then(jinja_name) ], content=content, ) jinja_element = jinja_structured_element | jinja_element_single return jinja_variable | jinja_comment | jinja_element
def parse_prefix_term(): symbol = yield lit_symbol | ( token("'") >> regex("[^\n\t ']+") << token("'")) args = [] br_open = yield string('(').optional() if br_open: t = yield lit_white >> parse_term args.append(t) while True: c = yield lit_white >> string(',').optional() if not c: break t = yield lit_white >> parse_term args.append(t) yield lit_white >> string(')') return Term(symbol, args)
def redirect_heredoc(): yield string("<<") quote = yield (string('"') | string("'")).optional() tag = yield word_id if quote is not None: yield string(quote) hd = RedirectHere(0, quote=quote, end=str(tag)) # We keep track of the list of heredocs we are looking for, in order. notes = yield get_notes # We have to take care to copy the previous notes' list; we don't want to # mutate the list itself during parsing and backtracking. notes['hds'] = list(notes.get('hds', [])) + [hd] yield put_note(notes) return hd
def _args_list_block() -> IndentSome: head = yield p_section_name << parsy.string(":") << sc # fmt: off return IndentSome( indent=None, f=lambda tail: {'name': head, 'items': tail}, p=p_items, )
def backtick(): """ Parse backticks. This is fugly. Backticks: I gave up on single-pass parsing here. It would be doable with the 'notes' extension, but would require enough context carrying forward that it'd need reimplementations of all bare string- and regex-matching things to understand how many levels deep they are. Here is the skinny: the shell has the $( ) which offer an objectively cleaner syntax. We parse backticks recursively because it's about the neatest approach to implement the shell spec that describes the feature in terms of a recursive implementation. The Posix shell spec, section 2.6.3, says this: Within the backquoted style of command substitution, <backslash> shall retain its literal meaning, except when followed by: '$', '`', or <backslash>. The search for the matching backquote shall be satisfied by the first unquoted non-escaped backquote; during this search, if a non-escaped backquote is encountered within a shell comment, a here-document, an embedded command substitution of the $(command) form, or a quoted string, undefined results occur. A single-quoted or double-quoted string that begins, but does not end, within the "`...`" sequence produces undefined results. What a mess. """ content = yield string("`") >> ( string("`").should_fail("backtick") >> (string(r"\`").result("`") | string(r"\$").result("$") | string(r"\\").result("\\") | regex(r'[^\\`]*') | string("\\"))).many().concat() << string("`") return command_sequence.parse(content)
def internal_to_parser(idx): rule_def = rule_defs[idx] if isinstance(rule_def, str): return string(rule_def) else: return alt(*[ seq(*list(map(internal_to_parser, to_seq))).map(lambda l: ''.join(l)) for to_seq in rule_def ])
def make_quoted_string_attribute_parser(quote, jinja): """ quote: A single or a double quote """ def combine(locations, value): return String( value=value, quote=quote, **locations, ) value_char = P.regex(r'[^<]', flags=re.DOTALL) value = interpolated( P.string(quote).should_fail('no ' + quote).then(jinja | value_char).many()) return locate(P.string(quote).then(value).skip( P.string(quote))).combine(combine)
def make_raw_text_element_parser(tag_name, jinja): """ Used for <style> and <script>. """ opening_tag = make_opening_tag_parser( tag_name_parser=P.string(tag_name), jinja=jinja, ) body = P.regex(r'.*?(?=</' + tag_name + '>)', flags=re.DOTALL) closing_tag = make_closing_tag_parser(P.string(tag_name)) return (locate(P.seq( opening_tag, body, closing_tag, )).combine(_combine_element))
def test_generate_backtracking(self): @generate def xy(): yield string('x') yield string('y') assert False parser = xy | string('z') # should not finish executing xy() self.assertEqual(parser.parse('z'), 'z')
def parse_shader(varying_type: str, source: str) -> Tuple[List[Varying], str]: sentinel = parsy.string(f'@{varying_type}') definition = OPTIONAL_WHITESPACE >> sentinel >> OPTIONAL_WHITESPACE >> BODY varying_definition, source = definition.parse_partial( _strip_comments(source)) varying_definition = sorted(varying_definition, key=lambda v: str(v)) varying_names = ', '.join(varying.identifier for varying in varying_definition) header = f'${varying_type} {varying_names}\n' return varying_definition, header + source
def test_at_most(self): ab = string("ab") self.assertEqual(ab.at_most(2).parse(""), []) self.assertEqual(ab.at_most(2).parse("ab"), ["ab"]) self.assertEqual(ab.at_most(2).parse("abab"), ["ab", "ab"]) self.assertRaises(ParseError, ab.at_most(2).parse, "ababab")
def skip_line_comment(prefix): """ Given comment prefix this function returns a parser that skips line comments. Note that it stops just before the newline character but doesn't consume the newline. Newline is either supposed to be consumed by 'space' parser or picked up manually. """ return ( parsy.string(prefix).result('') << parsy.regex(r'[^\n]*') ).desc('line-comment')
def make_attribute_parser(jinja): attribute_value = make_attribute_value_parser(jinja) return (locate( P.seq( interpolated(tag_name).skip(whitespace), P.seq( P.string('=').skip(whitespace).tag('equal'), interpolated(attribute_value).tag('value'), ).map(dict).optional(), )).combine(_combine_attribute).desc('attribute'))
def parseRow(): num = yield parsy.whitespace.many() >> intParser atom = yield parsy.string('.') >> parsy.regex(r'[a-zA-Z.]*') yield parsy.whitespace.many() x = yield floatParser yield parsy.whitespace.many() y = yield floatParser yield parsy.whitespace.many() z = yield floatParser return {'atom':atom, 'num':num, 'x':x, 'y':y, 'z':z}
def test_generate_backtracking(): @generate def xy(): yield string('x') yield string('y') assert False parser = xy | string('z') # should not finish executing xy() assert parser.parse('z') == 'z'
def test_string_from(self): titles = string_from("Mr", "Mr.", "Mrs", "Mrs.") self.assertEqual(titles.parse("Mr"), "Mr") self.assertEqual(titles.parse("Mr."), "Mr.") self.assertEqual((titles + string(" Hyde")).parse("Mr. Hyde"), "Mr. Hyde") with self.assertRaises(ParseError) as err: titles.parse('foo') ex = err.exception self.assertEqual(str(ex), """expected one of 'Mr', 'Mr.', 'Mrs', 'Mrs.' at 0:0""")
def make_opening_tag_parser(jinja, tag_name_parser=None, allow_slash=False): attributes = make_attributes_parser(jinja) if not tag_name_parser: tag_name_parser = tag_name | jinja if allow_slash: slash = (locate( P.string('/').skip(whitespace)).combine(_combine_slash).optional()) else: slash = P.success(None) return (locate( P.seq( P.string('<'), tag_name_parser.skip(whitespace), attributes.skip(whitespace), slash, P.string('>'), )).combine(_combine_opening_tag))
def container_element_impl(): o_tag_node = yield opening_tag content_nodes = yield content tag_name = o_tag_node.name if isinstance(tag_name, str): closing_tag = make_closing_tag_parser(P.string(tag_name)) else: assert isinstance(tag_name, Jinja) closing_tag = make_closing_tag_parser(jinja) c_tag_node = yield closing_tag return [o_tag_node, content_nodes, c_tag_node]
def fun(): name = yield lit_white >> parse_rule_name.optional() if not name: name = rule_name_generator() kept, removed, guard, body = yield \ parse_simplification \ | parse_propagation \ | parse_simpagation yield lit_white >> string('.') return Rule(name, kept, removed, guard if guard else [], body)
def make_attribute_parser(jinja): attribute_value = make_attribute_value_parser(jinja) return (locate( P.seq( interpolated(attr_name), whitespace.then( P.seq( P.string("=").skip(whitespace).tag("equal"), interpolated(attribute_value).tag("value"), ).map(dict)).optional(), )).combine(_combine_attribute).desc("attribute"))
def core_log() -> Parser: version_decl = line_with(string("Version ") >> semver) platforms_decl = line_with( bracketed(integer) << string(" compatible platform(s):")) perf = floating << string(" ns/day") perf_checkpoint = line_with( string("Performance since last checkpoint: ") >> perf) perf_average = line_with(string("Average performance: ") >> perf) platform_name = string("CUDA") | string("OpenCL") cuda_status = line_with( seq( enabled=string("Using ") >> platform_name.map(lambda name: name == "CUDA"), gpu=string(" and gpu ") >> integer, ).combine_dict(CudaStatus)) yield line_with(string("Folding@home GPU Core22 Folding@home Core")) version = yield version_decl num_platforms = yield platforms_decl platforms = yield numbered_list(platform, num_platforms) << newline devices = yield numbered_list(platform_devices, num_platforms) cuda_status = yield search(cuda_status, "CUDA status").optional() yield many_until(any_char, perf_checkpoint, "checkpoint") checkpoint_perfs = yield perf_checkpoint.sep_by( many_until( any_char, perf_checkpoint | perf_average, "checkpoint or average performace", )) average_perf = yield perf_average.optional() return CoreLog( version=version, platforms=[ Platform(info=platform, devices=platform_devices) for platform, platform_devices in zip(platforms, devices) ], cuda=cuda_status, checkpoint_perfs_ns_day=checkpoint_perfs, average_perf_ns_day=average_perf, )
def word(): quote_char = yield regex('[\'"]').optional() if quote_char is None: return unquoted_word else: escape = regex(fr'\\[\\{quote_char}]').map(lambda x: x[-1]) value_char = escape | regex(fr'[^\\{quote_char}]+') word = yield value_char.many().concat() yield string(quote_char).desc("quote") return word
def parse_constraints(): c = yield lit_white >> parse_term args = [c] while True: sep = yield lit_white >> string(',').optional() if not sep: break c1 = yield lit_white >> parse_term args.append(c1) return args
def _input_declaration(): yield string('input') yield whitespace type_ = yield _input_type yield whitespace name = yield _value_name message = yield _message.optional() return Input(name=name, type=type_, message=message)
def test_skip_line_comment(): # does not need newline to be present p = skip_line_comment('//') s = "// this line comment doesn't have a newline at the end " val = p.parse(s) assert val == "" # trailing newline is not consumed p = skip_line_comment('//') + parsy.string("\n") s = "// here we go\n" val = p.parse(s) assert val == "\n"
def atom(): start_pos = yield parsy.line_info quote = yield parsy.string("'").optional() ret = yield raw_atom end_pos = yield parsy.line_info ran = to_range(start_pos, end_pos) if quote is not None: return RList([RSymbol('quote'), ret], span=ran) else: ret.range = ran return ret
def line_of_code(): yield whitespace address = yield lexeme(number) yield colon byte_codes = yield lexeme((byte_code << parsy.string(' ')).times(1, max=4)) mnemonic = yield lexeme(parsy.regex(r'\w{2,5}')) op1 = yield lexeme(operand.optional()) yield lexeme(comma.optional()) op2 = yield lexeme(operand.optional()) yield lexeme(semicolon.optional()) comment = yield lexeme(parsy.regex(r'.+').optional()) return 'loc', address, byte_codes, mnemonic, op1, op2
def test_bind(self): piped = None def binder(x): nonlocal piped piped = x return string('y') parser = string('x').bind(binder) self.assertEqual(parser.parse('xy'), 'y') self.assertEqual(piped, 'x') self.assertRaises(ParseError, parser.parse, 'x')
def test_bind(): piped = None def binder(x): nonlocal piped piped = x return string('y') parser = string('x').bind(binder) assert parser.parse('xy') == 'y' assert piped == 'x' try: parser.parse('x'); assert False except ParseError: pass
def test_mark(self): parser = (letter.many().mark() << string("\n")).many() lines = parser.parse("asdf\nqwer\n") self.assertEqual(len(lines), 2) (start, letters, end) = lines[0] self.assertEqual(start, (0, 0)) self.assertEqual(letters, ['a', 's', 'd', 'f']) self.assertEqual(end, (0, 4)) (start, letters, end) = lines[1] self.assertEqual(start, (1, 0)) self.assertEqual(letters, ['q', 'w', 'e', 'r']) self.assertEqual(end, (1, 4))
from parsy import string, regex, generate import re from sys import stdin whitespace = regex(r'\s*', re.MULTILINE) lexeme = lambda p: p << whitespace lbrace = lexeme(string('{')) rbrace = lexeme(string('}')) lbrack = lexeme(string('[')) rbrack = lexeme(string(']')) colon = lexeme(string(':')) comma = lexeme(string(',')) true = lexeme(string('true')).result(True) false = lexeme(string('false')).result(False) null = lexeme(string('null')).result(None) number = lexeme( regex(r'-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?') ).map(float) string_part = regex(r'[^"\\]+') string_esc = string('\\') >> ( string('\\') | string('/') | string('b').result('\b') | string('f').result('\f') | string('n').result('\n') | string('r').result('\r') | string('t').result('\t')
def test_or(self): x_or_y = string('x') | string('y') self.assertEqual(x_or_y.parse('x'), 'x') self.assertEqual(x_or_y.parse('y'), 'y')
def thing(): yield string('t')
def test_string(self): parser = string('x') self.assertEqual(parser.parse('x'), 'x') self.assertRaises(ParseError, parser.parse, 'y')
def test_then(self): xy_parser = string('x') >> string('y') self.assertEqual(xy_parser.parse('xy'), 'y') self.assertRaises(ParseError, xy_parser.parse, 'y') self.assertRaises(ParseError, xy_parser.parse, 'z')
def test_many_with_then(self): parser = string('x').many() >> string('y') self.assertEqual(parser.parse('y'), 'y') self.assertEqual(parser.parse('xy'), 'y') self.assertEqual(parser.parse('xxxxxy'), 'y')
def binder(x): nonlocal piped piped = x return string('y')
def test_many_with_then(): parser = string('x').many() >> string('y') assert parser.parse('y') == 'y' assert parser.parse('xy') == 'y' assert parser.parse('xxxxxy') == 'y'
def parser(): nonlocal x nonlocal y x = yield string('x') y = yield string('y') return 3
def quote(): yield string("'") e = yield expr return ['quote', e]
def quoted(): yield string('"') body = yield (string_part | string_esc).many() yield string('"') return ''.join(body)
def xy(): yield string('x') yield string('y') assert False
def test_string(): parser = string('x') assert parser.parse('x') == 'x' try: parser.parse('y'); assert False except ParseError: pass
from parsy import string, regex, generate import re import pdb whitespace = regex(r'\s+', re.MULTILINE) comment = regex(r';.*') ignore = (whitespace | comment).many() lexeme = lambda p: p << ignore lparen = lexeme(string('(')) rparen = lexeme(string(')')) number = lexeme(regex(r'\d+')).map(int) symbol = lexeme(regex(r'[\d\w_-]+')) true = lexeme(string('#t')).result(True) false = lexeme(string('#f')).result(False) atom = true | false | number | symbol @generate def form(): yield lparen els = yield expr.many() yield rparen return els @generate def quote(): yield string("'") e = yield expr return ['quote', e]
def test_or(): x_or_y = string('x') | string('y') assert x_or_y.parse('x') == 'x' assert x_or_y.parse('y') == 'y'
def test_or_with_then(self): parser = (string('\\') >> string('y')) | string('z') self.assertEqual(parser.parse('\\y'), 'y') self.assertEqual(parser.parse('z'), 'z') self.assertRaises(ParseError, parser.parse, '\\z')
def xy(): nonlocal x nonlocal y x = yield string('x') y = yield string('y') return 3