def __init__(self): # # Parser combinators # SPACES = spaces() optional_spaces = optional(SPACES) empty = SPACES.parsecmap(lambda x: EMPTY) comment = string('%%%') >> regex('.*') comment = comment.parsecmap(Comment) codepoint_hex = regex('[0-9A-F]+') codepoint_hex = codepoint_hex.parsecmap(lambda x: int(x, 16)) codepoint = string('U+') >> codepoint_hex codepoint_seq = sepBy(codepoint, SPACES) codepoint_seq = codepoint_seq.parsecmap(tuple) arrow = string('=>') arrow = optional_spaces >> arrow << optional_spaces mapping = joint( codepoint_seq << arrow, codepoint_seq, optional(comment), ) mapping = mapping.parsecmap(lambda x: Mapping(x[0], x[1], x[2])) line = try_choice(mapping, try_choice( comment, empty, )) self.parse = line.parse
def test_memberMappings(self): from parsec import sepBy from parsec import string from proguard_mapping_parser.parser import memberMapping members = sepBy(memberMapping, string('\n')) self.assertEquals( [ ((None, ('java.util.HashMap', None), 'mHashMap', None), 'a'), (((35, 37), ('void', None), '<init>', []), '<init>'), (((66, 66), ('boolean', None), 'contains', [ ('java.lang.Object', None), ]), 'a'), ], members.parse( ' java.util.HashMap mHashMap -> a\n' ' 35:37:void <init>() -> <init>\n' ' 66:66:boolean contains(java.lang.Object) -> a\n'))
def json_object(): yield lbrace << many(comment) pairs = yield sepBy(object_pair, comma) yield many(comment) << rbrace raise StopGenerator(dict(pairs))
def array(): yield lbrack << many(comment) elements = yield sepBy(value, comma) yield rbrack << many(comment) raise StopGenerator(elements)
import re from parsec import string, sepBy, regex, sepEndBy1, spaces, Parser, separated, Value, generate, many1, digit quoted_string = regex(r'"[^"]*"', re.MULTILINE) cell = quoted_string ^ regex(r'[^,"\r\n]*') end_line = regex(r'\r\n?', re.MULTILINE) row = sepBy(cell, string(",") << spaces()) header = row csv = (header << end_line) + sepEndBy1(row, end_line) def parser_by_count(value): try: num_cells = int(value) return separated(cell, string(",") << spaces(), mint=num_cells, maxt=num_cells) except ValueError: return Parser( lambda index, text: Value.failure(index, "expected a number")) first_cell = (cell << string(",") << spaces()) counting_parser = first_cell.bind(parser_by_count) # @generate def matrix_parser(): cell = many1(digit()).parsecmap(''.join).parsecmap(int)
# combinators](https://en.wikipedia.org/wiki/Parsec_(parser)) to keep this # managable. _whitespace = ps.regex(r'\s*') _lexeme = lambda p: p << _whitespace _lbrace = _lexeme(ps.string('{')) _rbrace = _lexeme(ps.string('}')) _separator = _lexeme(ps.regex(r'[ ,]')) _name = _lexeme(ps.regex(r'[\w]+')) _num_hex = _lexeme(ps.regex(r'0x[0-9a-fA-F]+')).parsecmap(lambda h: int(h, base=16)) _num_int = _lexeme(ps.regex(r'-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?')).parsecmap(int) _num_float = _lexeme(ps.regex(r'-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?')).parsecmap(float) _list_of = lambda elems: _lbrace >> ps.many(elems) << _rbrace _sep_list_of = lambda elems: _lbrace >> ps.sepBy(elems, _separator) << _rbrace _param_value = _num_int | _list_of(_num_int) @ps.generate def _ldpc_key_value(): key = yield _name val = yield _param_value return (key, val) @ps.generate def _ldpc_param(): param_name = yield _name elems = yield _list_of(_ldpc_key_value) return (param_name, dict(elems))
def row(): return ( Parser.tokenize(Parser.time_parser()) + psc.sepBy(Parser.row_element(), psc.many1(psc.string(' '))) )
from parsec import string, none_of, sepBy, many quoted_char = none_of('"') | string('""').result('"') quoted = (string('"') >> many(quoted_char) << string('"')).parsecmap(lambda x: "".join(x)) cell = quoted | many(none_of(",\n")).parsecmap(lambda x: "".join(x)) cells = sepBy(cell, string(",")) table = sepBy(cells, string("\n"))
def field_options(): yield lexeme(string('[')) options = yield sepBy(option, comma) yield lexeme(string(']')) return options
logger = logging.getLogger(__name__) optionalspaces = optional(spaces()) arrow = optionalspaces >> string('->') << optionalspaces identifier = (regex('[a-zA-Z_$][a-zA-Z_$0-9]*') ^ string('<init>') ^ string('<clinit>')) className = sepBy1(identifier, string('$')) packagedFullName = sepBy1(identifier, string('.')) packagedClassName = packagedFullName.parsecmap(lambda l: '.'.join(l)) typeName = packagedClassName | regex('[a-z]+') javatype = joint(typeName, optional(string('[]'))) methodName = identifier methodArguments = sepBy(optionalspaces >> javatype << optionalspaces, string(',')) methodArguments = string('(') >> methodArguments << string(')') linenumber = regex('[0-9]+').parsecmap(lambda s: int(s)) linenumbers = joint( linenumber << string(':'), linenumber << string(':'), ) member = joint( optional(linenumbers), javatype << spaces(), identifier, optional(methodArguments), )
def parse_list(): """Parse a list as a ()-enclosed sequence of expressions.""" yield parsec.string('(') vals = yield parsec.sepBy(parse_expr, parsec.many1(parsec.space())) yield parsec.string(')') return wtypes.List(vals)
def json(): yield lcbrace pairs = yield parsec.sepBy(mapping, comma) yield rcbrace return pairs
def array(): # return lexeme(parsec.joint(lsbrace, parsec.sepBy(value, comma), rsbrace)) yield lsbrace elements = yield parsec.sepBy(value, comma) yield rsbrace return elements