Example #1
0
def matrix_parser():
    cell = many1(digit()).parsecmap(''.join).parsecmap(int)
    height = yield cell
    yield (string(",") << spaces())
    width = yield cell
    yield string('\n')
    row = separated(cell, string(",") << spaces(), mint=width, maxt=width)
    rows = separated(row, string('\n'), mint=height, maxt=height)
    return rows
Example #2
0
    def __init__(self):
        #
        # Parser combinators
        #

        SPACES = spaces()
        optional_spaces = optional(SPACES)
        empty = SPACES.parsecmap(lambda x: EMPTY)
        comment = string('%%%') >> regex('.*')
        comment = comment.parsecmap(Comment)
        codepoint_hex = regex('[0-9A-F]+')
        codepoint_hex = codepoint_hex.parsecmap(lambda x: int(x, 16))
        codepoint = string('U+') >> codepoint_hex
        codepoint_seq = sepBy(codepoint, SPACES)
        codepoint_seq = codepoint_seq.parsecmap(tuple)
        arrow = string('=>')
        arrow = optional_spaces >> arrow << optional_spaces
        mapping = joint(
            codepoint_seq << arrow,
            codepoint_seq,
            optional(comment),
        )
        mapping = mapping.parsecmap(lambda x: Mapping(x[0], x[1], x[2]))
        line = try_choice(mapping, try_choice(
            comment,
            empty,
        ))
        self.parse = line.parse
Example #3
0
def parser_by_count(value):
    try:
        num_cells = int(value)
        return separated(cell,
                         string(",") << spaces(),
                         mint=num_cells,
                         maxt=num_cells)
    except ValueError:
        return Parser(
            lambda index, text: Value.failure(index, "expected a number"))
Example #4
0
 def _parse_fasta(self, filehandle, sep="|"):
     """
     Parse a fasta file. The header is split into fields on 'sep'. The
     sequence is added as a final field.
     """
     p_header = parsec.string(">") >> parsec.regex("[^\n\r]*") << parsec.spaces()
     p_seq = (
         parsec.sepBy1(
             parsec.regex("[^>\n\r]*"), sep=parsec.regex("[\r\n\t ]+")
         ).parsecmap(concat)
         << parsec.spaces()
     )
     p_entry = p_header + p_seq
     p_fasta = parsec.many1(p_entry)
     log(f"Reading {file_str(filehandle)} as a fasta file:")
     try:
         entries = p_fasta.parse(filehandle.read())
     except AttributeError:
         # in case I want to pass in a list of strings, e.g., in tests
         entries = p_fasta.parse(filehandle)
     row = [h.split(sep) + [q] for (h, q) in entries]
     return row
Example #5
0
        def component(duty_exp: DutyExpression) -> Parser:
            """Matches a string prefix and returns the associated type id, along
            with any parsed amounts and units according to their applicability,
            as a 4-tuple of (id, amount, monetary unit, measurement)."""
            prefix = duty_exp.prefix
            has_amount = duty_exp.duty_amount_applicability_code
            has_measurement = duty_exp.measurement_unit_applicability_code
            has_monetary = duty_exp.monetary_unit_applicability_code

            id = token(prefix).result(duty_exp)
            this_value = if_applicable(has_amount, decimal)
            this_monetary_unit = if_applicable(
                has_monetary,
                spaces() >> self._monetary_unit,
                # We must match the percentage if the amount should be there
                # and no monetary unit matches.
                default=(percentage_unit
                         if has_amount == ApplicabilityCode.MANDATORY else
                         optional(percentage_unit)),
            )
            this_measurement = if_applicable(
                has_measurement,
                optional(token("/")) >> self._measurement,
            )

            component = joint(id, this_value, this_monetary_unit,
                              this_measurement)
            measurement_only = joint(id, this_measurement).parsecmap(
                lambda t: (t[0], None, None, t[1]), )

            # It's possible for units that contain numbers (e.g. DTN => '100 kg')
            # to be confused with a simple specific duty (e.g 100.0 + kg)
            # So in the case that amounts are only optional and measurements are present,
            # we have to check for just measurements first.
            return (measurement_only
                    ^ component if has_amount == ApplicabilityCode.PERMITTED
                    and has_measurement != ApplicabilityCode.NOT_PERMITTED else
                    component).parsecmap(
                        lambda exp: component_output(
                            duty_expression=exp[0],
                            duty_amount=exp[1],
                            monetary_unit=exp[2],
                            component_measurement=exp[3],
                        ), )
Example #6
0
            return 'meta'
        if self.is_plain:
            return '%s%s' % (self.element_type, star_str)
        if self.is_list:
            return '[%s%s]' % (self.element_type, star_str)
        if self.is_dlist:
            return '[[%s%s]]' % (self.element_type, star_str)
        if self.is_set:
            return '{%s%s}' % (self.element_type, star_str)
        if self.is_dict:
            return '{%s: %s%s}' % (self.element_type[0], self.element_type[1],
                                   star_str)
        raise RuntimeError('Invalid codegen kind: %s' % self.kind)


name_pattern = parsec.spaces() >> parsec.regex(
    r'[_a-zA-Z][_a-zA-Z0-9<>, ]*(::[_a-zA-Z][_a-zA-Z0-9<>, ]*)*'
) << parsec.spaces()

star_pattern = parsec.spaces() >> parsec.optional(parsec.string('*'),
                                                  '') << parsec.spaces()

parse_meta = parsec.spaces().parsecmap(lambda _: CodeGenKind('meta'))

parse_plain = (parsec.spaces() >>
               (name_pattern + star_pattern) << parsec.spaces()
               ).parsecmap(lambda value: CodeGenKind('plain', value))
parse_list = (parsec.string('[') >>
              (name_pattern + star_pattern) << parsec.string(']')
              ).parsecmap(lambda value: CodeGenKind('list', value))
parse_dlist = (parsec.string('[[') >>
Example #7
0
import re

from parsec import string, sepBy, regex, sepEndBy1, spaces, Parser, separated, Value, generate, many1, digit

quoted_string = regex(r'"[^"]*"', re.MULTILINE)
cell = quoted_string ^ regex(r'[^,"\r\n]*')
end_line = regex(r'\r\n?', re.MULTILINE)
row = sepBy(cell, string(",") << spaces())
header = row
csv = (header << end_line) + sepEndBy1(row, end_line)


def parser_by_count(value):
    try:
        num_cells = int(value)
        return separated(cell,
                         string(",") << spaces(),
                         mint=num_cells,
                         maxt=num_cells)
    except ValueError:
        return Parser(
            lambda index, text: Value.failure(index, "expected a number"))


first_cell = (cell << string(",") << spaces())
counting_parser = first_cell.bind(parser_by_count)


# @generate
def matrix_parser():
    cell = many1(digit()).parsecmap(''.join).parsecmap(int)
Example #8
0
    field_type: str
    name: str
    field_id: int
    options: List[Option]


@dataclass
class Message(object):
    name: str
    # options: List[Option]
    fields: List[Field]


FIELD = "required int32 x = 1;"

lexeme = lambda p: p << spaces()
# TODO: check with Protobuf spec


def is_any(parsers):
    if not parsers:
        return  # ?
    result = parsers[0]
    for p in parsers[1:]:
        result |= p
    return result


def is_a(enum_cls):
    return is_any([string(m)
                   for m in enum_cls.__members__]).parsecmap(enum_cls)
Example #9
0
def token(s: str) -> Parser:
    """Matches a string surrounded optionally by whitespace."""
    return spaces() >> string(s) << spaces()
Example #10
0
#
from __future__ import absolute_import
from __future__ import print_function
import logging

from parsec import joint
from parsec import optional
from parsec import regex
from parsec import string
from parsec import spaces
from parsec import sepBy
from parsec import sepBy1

logger = logging.getLogger(__name__)

optionalspaces = optional(spaces())
arrow = optionalspaces >> string('->') << optionalspaces

identifier = (regex('[a-zA-Z_$][a-zA-Z_$0-9]*') ^ string('<init>')
              ^ string('<clinit>'))
className = sepBy1(identifier, string('$'))
packagedFullName = sepBy1(identifier, string('.'))
packagedClassName = packagedFullName.parsecmap(lambda l: '.'.join(l))
typeName = packagedClassName | regex('[a-z]+')
javatype = joint(typeName, optional(string('[]')))

methodName = identifier
methodArguments = sepBy(optionalspaces >> javatype << optionalspaces,
                        string(','))
methodArguments = string('(') >> methodArguments << string(')')
Example #11
0
def lexme(parser):
    return parser << psc.spaces()
Example #12
0
def words(self, n=1):
    return psc.separated(self.word(), psc.spaces(), n, n,
                         end=False).parsecmap(lambda x: ' '.join(x))
Example #13
0
        if self.is_meta:
            return 'meta'
        if self.is_plain:
            return '%s%s' % (self.element_type, star_str)
        if self.is_list:
            return '[%s%s]' % (self.element_type, star_str)
        if self.is_dlist:
            return '[[%s%s]]' % (self.element_type, star_str)
        if self.is_set:
            return '{%s%s}' % (self.element_type, star_str)
        if self.is_dict:
            return '{%s: %s%s}' % (self.element_type[0], self.element_type[1], star_str)
        raise RuntimeError('Invalid codegen kind: %s' % self.kind)


name_pattern = parsec.spaces() >> parsec.regex(
    r'[_a-zA-Z][_a-zA-Z0-9<>, ]*(::[_a-zA-Z][_a-zA-Z0-9<>, ]*)*') << parsec.spaces()

star_pattern = parsec.spaces() >> parsec.optional(parsec.string('*'), '') << parsec.spaces()

parse_meta = parsec.spaces().parsecmap(lambda _: CodeGenKind('meta'))

parse_plain = (parsec.spaces() >>
               (name_pattern + star_pattern) << parsec.spaces()).parsecmap(lambda value: CodeGenKind('plain', value))
parse_list = (parsec.string('[') >>
              (name_pattern + star_pattern) << parsec.string(']')).parsecmap(lambda value: CodeGenKind('list', value))
parse_dlist = (
    parsec.string('[[') >>
    (name_pattern + star_pattern) << parsec.string(']]')).parsecmap(lambda value: CodeGenKind('dlist', value))
parse_set = (parsec.string('{') >>
             (name_pattern + star_pattern) << parsec.string('}')).parsecmap(lambda value: CodeGenKind('set', value))