コード例 #1
0
def matrix_parser():
    cell = many1(digit()).parsecmap(''.join).parsecmap(int)
    height = yield cell
    yield (string(",") << spaces())
    width = yield cell
    yield string('\n')
    row = separated(cell, string(",") << spaces(), mint=width, maxt=width)
    rows = separated(row, string('\n'), mint=height, maxt=height)
    return rows
コード例 #2
0
    def __init__(self):
        #
        # Parser combinators
        #

        SPACES = spaces()
        optional_spaces = optional(SPACES)
        empty = SPACES.parsecmap(lambda x: EMPTY)
        comment = string('%%%') >> regex('.*')
        comment = comment.parsecmap(Comment)
        codepoint_hex = regex('[0-9A-F]+')
        codepoint_hex = codepoint_hex.parsecmap(lambda x: int(x, 16))
        codepoint = string('U+') >> codepoint_hex
        codepoint_seq = sepBy(codepoint, SPACES)
        codepoint_seq = codepoint_seq.parsecmap(tuple)
        arrow = string('=>')
        arrow = optional_spaces >> arrow << optional_spaces
        mapping = joint(
            codepoint_seq << arrow,
            codepoint_seq,
            optional(comment),
        )
        mapping = mapping.parsecmap(lambda x: Mapping(x[0], x[1], x[2]))
        line = try_choice(mapping, try_choice(
            comment,
            empty,
        ))
        self.parse = line.parse
コード例 #3
0
def parser_by_count(value):
    try:
        num_cells = int(value)
        return separated(cell,
                         string(",") << spaces(),
                         mint=num_cells,
                         maxt=num_cells)
    except ValueError:
        return Parser(
            lambda index, text: Value.failure(index, "expected a number"))
コード例 #4
0
 def _parse_fasta(self, filehandle, sep="|"):
     """
     Parse a fasta file. The header is split into fields on 'sep'. The
     sequence is added as a final field.
     """
     p_header = parsec.string(">") >> parsec.regex("[^\n\r]*") << parsec.spaces()
     p_seq = (
         parsec.sepBy1(
             parsec.regex("[^>\n\r]*"), sep=parsec.regex("[\r\n\t ]+")
         ).parsecmap(concat)
         << parsec.spaces()
     )
     p_entry = p_header + p_seq
     p_fasta = parsec.many1(p_entry)
     log(f"Reading {file_str(filehandle)} as a fasta file:")
     try:
         entries = p_fasta.parse(filehandle.read())
     except AttributeError:
         # in case I want to pass in a list of strings, e.g., in tests
         entries = p_fasta.parse(filehandle)
     row = [h.split(sep) + [q] for (h, q) in entries]
     return row
コード例 #5
0
ファイル: parsers.py プロジェクト: uktrade/tamato
        def component(duty_exp: DutyExpression) -> Parser:
            """Matches a string prefix and returns the associated type id, along
            with any parsed amounts and units according to their applicability,
            as a 4-tuple of (id, amount, monetary unit, measurement)."""
            prefix = duty_exp.prefix
            has_amount = duty_exp.duty_amount_applicability_code
            has_measurement = duty_exp.measurement_unit_applicability_code
            has_monetary = duty_exp.monetary_unit_applicability_code

            id = token(prefix).result(duty_exp)
            this_value = if_applicable(has_amount, decimal)
            this_monetary_unit = if_applicable(
                has_monetary,
                spaces() >> self._monetary_unit,
                # We must match the percentage if the amount should be there
                # and no monetary unit matches.
                default=(percentage_unit
                         if has_amount == ApplicabilityCode.MANDATORY else
                         optional(percentage_unit)),
            )
            this_measurement = if_applicable(
                has_measurement,
                optional(token("/")) >> self._measurement,
            )

            component = joint(id, this_value, this_monetary_unit,
                              this_measurement)
            measurement_only = joint(id, this_measurement).parsecmap(
                lambda t: (t[0], None, None, t[1]), )

            # It's possible for units that contain numbers (e.g. DTN => '100 kg')
            # to be confused with a simple specific duty (e.g 100.0 + kg)
            # So in the case that amounts are only optional and measurements are present,
            # we have to check for just measurements first.
            return (measurement_only
                    ^ component if has_amount == ApplicabilityCode.PERMITTED
                    and has_measurement != ApplicabilityCode.NOT_PERMITTED else
                    component).parsecmap(
                        lambda exp: component_output(
                            duty_expression=exp[0],
                            duty_amount=exp[1],
                            monetary_unit=exp[2],
                            component_measurement=exp[3],
                        ), )
コード例 #6
0
ファイル: codegen.py プロジェクト: xiaming9880/libvineyard
            return 'meta'
        if self.is_plain:
            return '%s%s' % (self.element_type, star_str)
        if self.is_list:
            return '[%s%s]' % (self.element_type, star_str)
        if self.is_dlist:
            return '[[%s%s]]' % (self.element_type, star_str)
        if self.is_set:
            return '{%s%s}' % (self.element_type, star_str)
        if self.is_dict:
            return '{%s: %s%s}' % (self.element_type[0], self.element_type[1],
                                   star_str)
        raise RuntimeError('Invalid codegen kind: %s' % self.kind)


name_pattern = parsec.spaces() >> parsec.regex(
    r'[_a-zA-Z][_a-zA-Z0-9<>, ]*(::[_a-zA-Z][_a-zA-Z0-9<>, ]*)*'
) << parsec.spaces()

star_pattern = parsec.spaces() >> parsec.optional(parsec.string('*'),
                                                  '') << parsec.spaces()

parse_meta = parsec.spaces().parsecmap(lambda _: CodeGenKind('meta'))

parse_plain = (parsec.spaces() >>
               (name_pattern + star_pattern) << parsec.spaces()
               ).parsecmap(lambda value: CodeGenKind('plain', value))
parse_list = (parsec.string('[') >>
              (name_pattern + star_pattern) << parsec.string(']')
              ).parsecmap(lambda value: CodeGenKind('list', value))
parse_dlist = (parsec.string('[[') >>
コード例 #7
0
import re

from parsec import string, sepBy, regex, sepEndBy1, spaces, Parser, separated, Value, generate, many1, digit

quoted_string = regex(r'"[^"]*"', re.MULTILINE)
cell = quoted_string ^ regex(r'[^,"\r\n]*')
end_line = regex(r'\r\n?', re.MULTILINE)
row = sepBy(cell, string(",") << spaces())
header = row
csv = (header << end_line) + sepEndBy1(row, end_line)


def parser_by_count(value):
    try:
        num_cells = int(value)
        return separated(cell,
                         string(",") << spaces(),
                         mint=num_cells,
                         maxt=num_cells)
    except ValueError:
        return Parser(
            lambda index, text: Value.failure(index, "expected a number"))


first_cell = (cell << string(",") << spaces())
counting_parser = first_cell.bind(parser_by_count)


# @generate
def matrix_parser():
    cell = many1(digit()).parsecmap(''.join).parsecmap(int)
コード例 #8
0
    field_type: str
    name: str
    field_id: int
    options: List[Option]


@dataclass
class Message(object):
    name: str
    # options: List[Option]
    fields: List[Field]


FIELD = "required int32 x = 1;"

lexeme = lambda p: p << spaces()
# TODO: check with Protobuf spec


def is_any(parsers):
    if not parsers:
        return  # ?
    result = parsers[0]
    for p in parsers[1:]:
        result |= p
    return result


def is_a(enum_cls):
    return is_any([string(m)
                   for m in enum_cls.__members__]).parsecmap(enum_cls)
コード例 #9
0
ファイル: parsers.py プロジェクト: uktrade/tamato
def token(s: str) -> Parser:
    """Matches a string surrounded optionally by whitespace."""
    return spaces() >> string(s) << spaces()
コード例 #10
0
#
from __future__ import absolute_import
from __future__ import print_function
import logging

from parsec import joint
from parsec import optional
from parsec import regex
from parsec import string
from parsec import spaces
from parsec import sepBy
from parsec import sepBy1

logger = logging.getLogger(__name__)

optionalspaces = optional(spaces())
arrow = optionalspaces >> string('->') << optionalspaces

identifier = (regex('[a-zA-Z_$][a-zA-Z_$0-9]*') ^ string('<init>')
              ^ string('<clinit>'))
className = sepBy1(identifier, string('$'))
packagedFullName = sepBy1(identifier, string('.'))
packagedClassName = packagedFullName.parsecmap(lambda l: '.'.join(l))
typeName = packagedClassName | regex('[a-z]+')
javatype = joint(typeName, optional(string('[]')))

methodName = identifier
methodArguments = sepBy(optionalspaces >> javatype << optionalspaces,
                        string(','))
methodArguments = string('(') >> methodArguments << string(')')
コード例 #11
0
def lexme(parser):
    return parser << psc.spaces()
コード例 #12
0
def words(self, n=1):
    return psc.separated(self.word(), psc.spaces(), n, n,
                         end=False).parsecmap(lambda x: ' '.join(x))
コード例 #13
0
ファイル: codegen.py プロジェクト: zeta1999/libvineyard
        if self.is_meta:
            return 'meta'
        if self.is_plain:
            return '%s%s' % (self.element_type, star_str)
        if self.is_list:
            return '[%s%s]' % (self.element_type, star_str)
        if self.is_dlist:
            return '[[%s%s]]' % (self.element_type, star_str)
        if self.is_set:
            return '{%s%s}' % (self.element_type, star_str)
        if self.is_dict:
            return '{%s: %s%s}' % (self.element_type[0], self.element_type[1], star_str)
        raise RuntimeError('Invalid codegen kind: %s' % self.kind)


name_pattern = parsec.spaces() >> parsec.regex(
    r'[_a-zA-Z][_a-zA-Z0-9<>, ]*(::[_a-zA-Z][_a-zA-Z0-9<>, ]*)*') << parsec.spaces()

star_pattern = parsec.spaces() >> parsec.optional(parsec.string('*'), '') << parsec.spaces()

parse_meta = parsec.spaces().parsecmap(lambda _: CodeGenKind('meta'))

parse_plain = (parsec.spaces() >>
               (name_pattern + star_pattern) << parsec.spaces()).parsecmap(lambda value: CodeGenKind('plain', value))
parse_list = (parsec.string('[') >>
              (name_pattern + star_pattern) << parsec.string(']')).parsecmap(lambda value: CodeGenKind('list', value))
parse_dlist = (
    parsec.string('[[') >>
    (name_pattern + star_pattern) << parsec.string(']]')).parsecmap(lambda value: CodeGenKind('dlist', value))
parse_set = (parsec.string('{') >>
             (name_pattern + star_pattern) << parsec.string('}')).parsecmap(lambda value: CodeGenKind('set', value))