def matrix_parser(): cell = many1(digit()).parsecmap(''.join).parsecmap(int) height = yield cell yield (string(",") << spaces()) width = yield cell yield string('\n') row = separated(cell, string(",") << spaces(), mint=width, maxt=width) rows = separated(row, string('\n'), mint=height, maxt=height) return rows
def __init__(self): # # Parser combinators # SPACES = spaces() optional_spaces = optional(SPACES) empty = SPACES.parsecmap(lambda x: EMPTY) comment = string('%%%') >> regex('.*') comment = comment.parsecmap(Comment) codepoint_hex = regex('[0-9A-F]+') codepoint_hex = codepoint_hex.parsecmap(lambda x: int(x, 16)) codepoint = string('U+') >> codepoint_hex codepoint_seq = sepBy(codepoint, SPACES) codepoint_seq = codepoint_seq.parsecmap(tuple) arrow = string('=>') arrow = optional_spaces >> arrow << optional_spaces mapping = joint( codepoint_seq << arrow, codepoint_seq, optional(comment), ) mapping = mapping.parsecmap(lambda x: Mapping(x[0], x[1], x[2])) line = try_choice(mapping, try_choice( comment, empty, )) self.parse = line.parse
def parser_by_count(value): try: num_cells = int(value) return separated(cell, string(",") << spaces(), mint=num_cells, maxt=num_cells) except ValueError: return Parser( lambda index, text: Value.failure(index, "expected a number"))
def _parse_fasta(self, filehandle, sep="|"): """ Parse a fasta file. The header is split into fields on 'sep'. The sequence is added as a final field. """ p_header = parsec.string(">") >> parsec.regex("[^\n\r]*") << parsec.spaces() p_seq = ( parsec.sepBy1( parsec.regex("[^>\n\r]*"), sep=parsec.regex("[\r\n\t ]+") ).parsecmap(concat) << parsec.spaces() ) p_entry = p_header + p_seq p_fasta = parsec.many1(p_entry) log(f"Reading {file_str(filehandle)} as a fasta file:") try: entries = p_fasta.parse(filehandle.read()) except AttributeError: # in case I want to pass in a list of strings, e.g., in tests entries = p_fasta.parse(filehandle) row = [h.split(sep) + [q] for (h, q) in entries] return row
def component(duty_exp: DutyExpression) -> Parser: """Matches a string prefix and returns the associated type id, along with any parsed amounts and units according to their applicability, as a 4-tuple of (id, amount, monetary unit, measurement).""" prefix = duty_exp.prefix has_amount = duty_exp.duty_amount_applicability_code has_measurement = duty_exp.measurement_unit_applicability_code has_monetary = duty_exp.monetary_unit_applicability_code id = token(prefix).result(duty_exp) this_value = if_applicable(has_amount, decimal) this_monetary_unit = if_applicable( has_monetary, spaces() >> self._monetary_unit, # We must match the percentage if the amount should be there # and no monetary unit matches. default=(percentage_unit if has_amount == ApplicabilityCode.MANDATORY else optional(percentage_unit)), ) this_measurement = if_applicable( has_measurement, optional(token("/")) >> self._measurement, ) component = joint(id, this_value, this_monetary_unit, this_measurement) measurement_only = joint(id, this_measurement).parsecmap( lambda t: (t[0], None, None, t[1]), ) # It's possible for units that contain numbers (e.g. DTN => '100 kg') # to be confused with a simple specific duty (e.g 100.0 + kg) # So in the case that amounts are only optional and measurements are present, # we have to check for just measurements first. return (measurement_only ^ component if has_amount == ApplicabilityCode.PERMITTED and has_measurement != ApplicabilityCode.NOT_PERMITTED else component).parsecmap( lambda exp: component_output( duty_expression=exp[0], duty_amount=exp[1], monetary_unit=exp[2], component_measurement=exp[3], ), )
return 'meta' if self.is_plain: return '%s%s' % (self.element_type, star_str) if self.is_list: return '[%s%s]' % (self.element_type, star_str) if self.is_dlist: return '[[%s%s]]' % (self.element_type, star_str) if self.is_set: return '{%s%s}' % (self.element_type, star_str) if self.is_dict: return '{%s: %s%s}' % (self.element_type[0], self.element_type[1], star_str) raise RuntimeError('Invalid codegen kind: %s' % self.kind) name_pattern = parsec.spaces() >> parsec.regex( r'[_a-zA-Z][_a-zA-Z0-9<>, ]*(::[_a-zA-Z][_a-zA-Z0-9<>, ]*)*' ) << parsec.spaces() star_pattern = parsec.spaces() >> parsec.optional(parsec.string('*'), '') << parsec.spaces() parse_meta = parsec.spaces().parsecmap(lambda _: CodeGenKind('meta')) parse_plain = (parsec.spaces() >> (name_pattern + star_pattern) << parsec.spaces() ).parsecmap(lambda value: CodeGenKind('plain', value)) parse_list = (parsec.string('[') >> (name_pattern + star_pattern) << parsec.string(']') ).parsecmap(lambda value: CodeGenKind('list', value)) parse_dlist = (parsec.string('[[') >>
import re from parsec import string, sepBy, regex, sepEndBy1, spaces, Parser, separated, Value, generate, many1, digit quoted_string = regex(r'"[^"]*"', re.MULTILINE) cell = quoted_string ^ regex(r'[^,"\r\n]*') end_line = regex(r'\r\n?', re.MULTILINE) row = sepBy(cell, string(",") << spaces()) header = row csv = (header << end_line) + sepEndBy1(row, end_line) def parser_by_count(value): try: num_cells = int(value) return separated(cell, string(",") << spaces(), mint=num_cells, maxt=num_cells) except ValueError: return Parser( lambda index, text: Value.failure(index, "expected a number")) first_cell = (cell << string(",") << spaces()) counting_parser = first_cell.bind(parser_by_count) # @generate def matrix_parser(): cell = many1(digit()).parsecmap(''.join).parsecmap(int)
field_type: str name: str field_id: int options: List[Option] @dataclass class Message(object): name: str # options: List[Option] fields: List[Field] FIELD = "required int32 x = 1;" lexeme = lambda p: p << spaces() # TODO: check with Protobuf spec def is_any(parsers): if not parsers: return # ? result = parsers[0] for p in parsers[1:]: result |= p return result def is_a(enum_cls): return is_any([string(m) for m in enum_cls.__members__]).parsecmap(enum_cls)
def token(s: str) -> Parser: """Matches a string surrounded optionally by whitespace.""" return spaces() >> string(s) << spaces()
# from __future__ import absolute_import from __future__ import print_function import logging from parsec import joint from parsec import optional from parsec import regex from parsec import string from parsec import spaces from parsec import sepBy from parsec import sepBy1 logger = logging.getLogger(__name__) optionalspaces = optional(spaces()) arrow = optionalspaces >> string('->') << optionalspaces identifier = (regex('[a-zA-Z_$][a-zA-Z_$0-9]*') ^ string('<init>') ^ string('<clinit>')) className = sepBy1(identifier, string('$')) packagedFullName = sepBy1(identifier, string('.')) packagedClassName = packagedFullName.parsecmap(lambda l: '.'.join(l)) typeName = packagedClassName | regex('[a-z]+') javatype = joint(typeName, optional(string('[]'))) methodName = identifier methodArguments = sepBy(optionalspaces >> javatype << optionalspaces, string(',')) methodArguments = string('(') >> methodArguments << string(')')
def lexme(parser): return parser << psc.spaces()
def words(self, n=1): return psc.separated(self.word(), psc.spaces(), n, n, end=False).parsecmap(lambda x: ' '.join(x))
if self.is_meta: return 'meta' if self.is_plain: return '%s%s' % (self.element_type, star_str) if self.is_list: return '[%s%s]' % (self.element_type, star_str) if self.is_dlist: return '[[%s%s]]' % (self.element_type, star_str) if self.is_set: return '{%s%s}' % (self.element_type, star_str) if self.is_dict: return '{%s: %s%s}' % (self.element_type[0], self.element_type[1], star_str) raise RuntimeError('Invalid codegen kind: %s' % self.kind) name_pattern = parsec.spaces() >> parsec.regex( r'[_a-zA-Z][_a-zA-Z0-9<>, ]*(::[_a-zA-Z][_a-zA-Z0-9<>, ]*)*') << parsec.spaces() star_pattern = parsec.spaces() >> parsec.optional(parsec.string('*'), '') << parsec.spaces() parse_meta = parsec.spaces().parsecmap(lambda _: CodeGenKind('meta')) parse_plain = (parsec.spaces() >> (name_pattern + star_pattern) << parsec.spaces()).parsecmap(lambda value: CodeGenKind('plain', value)) parse_list = (parsec.string('[') >> (name_pattern + star_pattern) << parsec.string(']')).parsecmap(lambda value: CodeGenKind('list', value)) parse_dlist = ( parsec.string('[[') >> (name_pattern + star_pattern) << parsec.string(']]')).parsecmap(lambda value: CodeGenKind('dlist', value)) parse_set = (parsec.string('{') >> (name_pattern + star_pattern) << parsec.string('}')).parsecmap(lambda value: CodeGenKind('set', value))