Ejemplo n.º 1
0
    def __init__(self):
        #
        # Parser combinators
        #

        SPACES = spaces()
        optional_spaces = optional(SPACES)
        empty = SPACES.parsecmap(lambda x: EMPTY)
        comment = string('%%%') >> regex('.*')
        comment = comment.parsecmap(Comment)
        codepoint_hex = regex('[0-9A-F]+')
        codepoint_hex = codepoint_hex.parsecmap(lambda x: int(x, 16))
        codepoint = string('U+') >> codepoint_hex
        codepoint_seq = sepBy(codepoint, SPACES)
        codepoint_seq = codepoint_seq.parsecmap(tuple)
        arrow = string('=>')
        arrow = optional_spaces >> arrow << optional_spaces
        mapping = joint(
            codepoint_seq << arrow,
            codepoint_seq,
            optional(comment),
        )
        mapping = mapping.parsecmap(lambda x: Mapping(x[0], x[1], x[2]))
        line = try_choice(mapping, try_choice(
            comment,
            empty,
        ))
        self.parse = line.parse
Ejemplo n.º 2
0
def enum():
    yield lexeme(string('enum'))
    name = yield identifier
    yield lexeme(string('{'))
    members = yield many(enum_value)
    yield lexeme(string('}'))
    return ProtobufEnum(name, members)
Ejemplo n.º 3
0
def message():
    yield lexeme(string('message'))
    name = yield identifier
    yield lexeme(string('{'))
    fields = yield many(field)
    yield lexeme(string('}'))
    return Message(name, fields)
Ejemplo n.º 4
0
def sdf_molecule():
    header = yield sdf_header
    atoms = yield P.times(sdf_atom, header.atom_num)
    bonds = yield P.times(sdf_bond, header.bond_num)
    yield seperator >> P.string('M') >> seperator >> P.string(
        'END') >> seperator >> P.string('> <value>')
    value = yield seperator >> num_f
    yield seperator >> P.string('$$$$')
    return SdfMolecule(header, atoms, bonds, value)
Ejemplo n.º 5
0
def matrix_parser():
    cell = many1(digit()).parsecmap(''.join).parsecmap(int)
    height = yield cell
    yield (string(",") << spaces())
    width = yield cell
    yield string('\n')
    row = separated(cell, string(",") << spaces(), mint=width, maxt=width)
    rows = separated(row, string('\n'), mint=height, maxt=height)
    return rows
Ejemplo n.º 6
0
def sdf_header():
    mol_num = yield transparent >> num_i
    atom_num = yield P.string('\n\n\n') >> P.times(P.one_of(' 0123456789'), 3)
    atom_num = int(''.join(atom_num))
    bond_num = yield P.times(P.one_of(' 0123456789'), 3)
    bond_num = int(''.join(bond_num))
    yield P.times(seperator >> num_i, 5)
    yield seperator >> P.string("V2000")
    return SdfHeader(mol_num, atom_num, bond_num)
Ejemplo n.º 7
0
def pins():
    "Parse [Component].[Pin]."

    def filt(x):
        (_, (mod, _)) = x
        m = mod.upper()
        return (not ((m == "POWER") or (m == "GND") or (m == "NC")))

    yield (lexeme(string("signal_name")) << lexeme(string("model_name")))
    rlcs = yield optional(count(rlc, 3), [])
    prs = yield many1(pin(rlcs))
    prs_filt = list(filter(filt, prs))
    return dict(prs_filt)
Ejemplo n.º 8
0
 def string_esc():
     return string('\\') >> (string('\\')
                             | string('/')
                             | string('b').result('\b')
                             | string('f').result('\f')
                             | string('n').result('\n')
                             | string('r').result('\r')
                             | string('t').result('\t')
                             |
                             regex(r'u[0-9a-fA-F]{4}').parsecmap(to_unichr)
                             | string(end_quote))
Ejemplo n.º 9
0
def ratio():
    [num, den] = yield (separated(number, string("/"), 2, maxt=2, end=False)
                        | na.result([0, 0]))
    if den:
        return num / den
    else:
        return None
Ejemplo n.º 10
0
def sdf_bond():
    fst = yield P.string('\n') >> P.times(P.one_of(' 0123456789'), 3)
    fst = int(''.join(fst))
    snd = yield P.times(P.one_of(' 0123456789'), 3)
    snd = int(''.join(snd))
    bond_type = yield seperator >> num_i
    yield P.times(seperator >> num_i, 3)
    return SdfBond(fst, snd, bond_type)
Ejemplo n.º 11
0
def abbrev(
    obj: Union[MeasurementUnit, MeasurementUnitQualifier, ], ) -> Parser:
    """
    Matches an abbreviation and returns the associated object.

    Humans cannot be relied upon to use spaces or thousand separators correctly
    so these can ignored.
    """
    return reduce(
        try_choice,
        [
            string(obj.abbreviation),
            string(obj.abbreviation.replace(" ", "")),
            string(obj.abbreviation.replace("1,000", "1000")),
            string(obj.abbreviation.replace(" ", "").replace("1,000", "1000")),
        ],
    ).result(obj)
Ejemplo n.º 12
0
def parse_untenbi():
    raw_pattern = yield (pattern_def ^ parsec.string(""))
    raw_rules = yield all_rules

    result = {}

    if raw_pattern:
        result["pattern"] = pattern_translate[raw_pattern]

    # Interpret each rule
    for raw_rule in raw_rules:

        if raw_rule["rule"] in {"start_date", "start_date_plus1"}:
            if raw_rule["rule"] == "start_date_plus1":
                result["start"] = day_plus_one(raw_rule["day"])
            else:
                result["start"] = raw_rule["day"]

            if "end" not in result:
                result["end"] = (12, 31)

            if "pattern" not in result:
                result["pattern"] = "毎日"

        elif raw_rule["rule"] in {"end_date", "end_date_minus1"}:
            if raw_rule["rule"] == "end_date_minus1":
                result["end"] = day_minus_one(raw_rule["day"])
            else:
                result["end"] = raw_rule["day"]

            if "start" not in result:
                result["start"] = (1, 1)

            if "pattern" not in result:
                result["pattern"] = "毎日"

        else:

            if "pattern" not in result:
                result[
                    "pattern"] = "全休" if raw_rule["rule"] == "added" else "毎日"

            if raw_rule["rule"] not in result:
                result[raw_rule["rule"]] = set()

            result[raw_rule["rule"]].update(raw_rule["days"])

    # Nullyfing exceptions
    if "added" in result and "removed" in result:
        if result["pattern"] == "毎日":
            result["removed"].difference_update(result["added"])
            del result["added"]

        elif result["pattern"] == "全休":
            result["added"].difference_update(result["removed"])
            del result["removed"]

    return result
Ejemplo n.º 13
0
def field():
    q = yield qualifier
    ft = yield field_type
    ident = yield identifier
    yield equals
    fi = yield field_id
    options = yield optional(field_options, default_value=[])
    yield lexeme(string(';'))
    return Field(q, ft, ident, fi, options)
Ejemplo n.º 14
0
def parser_by_count(value):
    try:
        num_cells = int(value)
        return separated(cell,
                         string(",") << spaces(),
                         mint=num_cells,
                         maxt=num_cells)
    except ValueError:
        return Parser(
            lambda index, text: Value.failure(index, "expected a number"))
Ejemplo n.º 15
0
def param():
    "Parse IBIS parameter."
    pname = yield regex(
        r"^[a-zA-Z]\w*",
        re.MULTILINE)  # Parameters must begin with a letter in column 1.
    if DBG:
        print(pname)
    res = yield (regex(r"\s*") >> (
        (word(string("=")) >> number) | typminmax | name | rest_line))
    yield ignore  # So that ``param`` functions as a lexeme.
    return (pname.lower(), res)
Ejemplo n.º 16
0
def single_day_rule():
    date = yield single_day_def
    rule_type = yield (parsec.string("から運転") ^ parsec.string("からは運転")
                       ^ parsec.string("からは運休") ^ parsec.string("まで運転")
                       ^ parsec.string("までは運転") ^ parsec.string("まで運休")
                       ^ parsec.string("までは運休"))

    if date["day"][0] is None:
        raise ValueError("month definition is required in から運転・まで運転 rules")

    return {"day": date["day"], "rule": rule_type_translations[rule_type]}
Ejemplo n.º 17
0
def fix_image_url(url, repo_name):
    '''Fixes a GitHub image urls.

    Any links with `github.com` are invalid, because the return *html*
    content. Image links would have `githubusercontent.com`. For example:

    - This returns an html: https://github.com/Retrothopter/Niobium-Nanotech/blob/master/Preview.png
    - This returns a png: https://githubusercontent.com/Retrothopter/Niobium-Nanotech/blob/master/Preview.png

    Any links that are relative are also invalid. For example:

    - preview.png
    - sprites/preview.png
    - /sprites/preview.png'''
    # FIXME: this assumes `master` is always the branch we want, while in reality we need the
    # `default_branch` of the repository, which could also for example be `main`
    from urllib.parse import urlparse
    from parsec import optional, string, regex, none_of, many, ParseError

    glob = (
        optional(string('/')) >> string(repo_name) >> string("/blob/master/")
        >> many(none_of("?")).parsecmap(lambda x: "".join(x)))

    o = urlparse(url)
    if o.netloc == "raw.githubusercontent.com":
        return url

    try:
        path = glob.parse(o.path)
    except ParseError as e:
        path = None
    if o.netloc == "github.com" and path:
        return f"https://raw.githubusercontent.com/{repo_name}/master/{path}"

    if o.netloc == "":
        return f"https://raw.githubusercontent.com/{repo_name}/master/{o.path}"

    return url
Ejemplo n.º 18
0
def fix_image_url(url, repo_name):
    '''Fixes a GitHub url, where the url should point to an image.

    Any links with `github.com` are invalid, because they're html links, while
    image links would have `githubusercontent.com`, for example:
    - https://github.com/Retrothopter/Niobium-Nanotech/blob/master/Preview.png;

    Any links that don't have a domain are relative and as such invalid, for example:
    - preview.png;
    - sprites/preview.png;
    - /sprites/preview.png

    This is also why a repo name is required.
    '''
    from urllib.parse import urlparse
    from parsec import optional, string, regex, none_of, many, ParseError

    glob = (
        optional(string('/')) >> string(repo_name) >> string("/blob/master/")
        >> many(none_of("?")).parsecmap(lambda x: "".join(x)))

    o = urlparse(url)
    if o.netloc == "raw.githubusercontent.com":
        return url

    try:
        path = glob.parse(o.path)
    except ParseError as e:
        path = None
    if o.netloc == "github.com" and path:
        return f"https://raw.githubusercontent.com/{repo_name}/master/{path}"

    if o.netloc == "":
        return f"https://raw.githubusercontent.com/{repo_name}/master/{o.path}"

    # print('[warning] non github url:', url)
    return url
Ejemplo n.º 19
0
 def fn():
     "Parse IBIS keyword."
     yield regex(r"^\[", re.MULTILINE)
     wordlets = yield sepBy1(name_only, one_of(
         " _"))  # ``name`` gobbles up trailing space, which we don't want.
     yield string("]")
     yield ignore  # So that ``keyword`` functions as a lexeme.
     res = ("_".join(wordlets)
            )  # Canonicalize to: "<wordlet1>_<wordlet2>_...".
     if kywrd:
         # assert res.lower() == kywrd.lower(), f"Expecting: {kywrd}; got: {res}."  # Does not work!
         if res.lower() == kywrd.lower():
             return res
         else:
             return fail.desc(f"Expecting: {kywrd}; got: {res}.")
     return res
Ejemplo n.º 20
0
    def test_memberMappings(self):
        from parsec import sepBy
        from parsec import string
        from proguard_mapping_parser.parser import memberMapping

        members = sepBy(memberMapping, string('\n'))
        self.assertEquals(
            [
                ((None, ('java.util.HashMap', None), 'mHashMap', None), 'a'),
                (((35, 37), ('void', None), '<init>', []), '<init>'),
                (((66, 66), ('boolean', None), 'contains', [
                    ('java.lang.Object', None),
                ]), 'a'),
            ],
            members.parse(
                '    java.util.HashMap mHashMap -> a\n'
                '    35:37:void <init>() -> <init>\n'
                '    66:66:boolean contains(java.lang.Object) -> a\n'))
Ejemplo n.º 21
0
def range_def():
    start_month = yield month_def
    start_day = yield day_def

    yield parsec.string("~")

    end_month = yield month_def
    end_day = yield day_def

    start_month = int(start_month) if start_month else None
    start_day = int(start_day)

    end_month = int(end_month) if end_month else None
    end_day = int(end_day)

    return {
        "type": "range",
        "start": (start_month, start_day),
        "end": (end_month, end_day)
    }
Ejemplo n.º 22
0
 def _parse_fasta(self, filehandle, sep="|"):
     """
     Parse a fasta file. The header is split into fields on 'sep'. The
     sequence is added as a final field.
     """
     p_header = parsec.string(">") >> parsec.regex("[^\n\r]*") << parsec.spaces()
     p_seq = (
         parsec.sepBy1(
             parsec.regex("[^>\n\r]*"), sep=parsec.regex("[\r\n\t ]+")
         ).parsecmap(concat)
         << parsec.spaces()
     )
     p_entry = p_header + p_seq
     p_fasta = parsec.many1(p_entry)
     log(f"Reading {file_str(filehandle)} as a fasta file:")
     try:
         entries = p_fasta.parse(filehandle.read())
     except AttributeError:
         # in case I want to pass in a list of strings, e.g., in tests
         entries = p_fasta.parse(filehandle)
     row = [h.split(sep) + [q] for (h, q) in entries]
     return row
Ejemplo n.º 23
0
    pip install relaxedjson

To install as an egg-link in development mode::

    python setup.py develop -N

"""

import re
from parsec import (sepBy, regex, string, generate, many, endBy)

whitespace = regex(r'\s*', re.MULTILINE)

lexeme = lambda p: p << whitespace

comment = string('/*') >> regex(r'(?:[^*]|\*(?!\/))+',
                                re.MULTILINE) << string('*/')
comment = lexeme(comment)

lbrace = lexeme(string('{'))
rbrace = lexeme(string('}'))
lbrack = lexeme(string('['))
rbrack = lexeme(string(']'))
colon = lexeme(string(':'))
comma = lexeme(string(','))
true = lexeme(string('true')).result(True)
false = lexeme(string('false')).result(False)
null = lexeme(string('null')).result(None)
quote = string('"') | string("'")

Ejemplo n.º 24
0

# The targets of our parsing.
# We look for instructions to insert text, chapters,
# paragraphs and line breaks.
Text = collections.namedtuple('Text', ['text'])
NewChapter = collections.namedtuple('NewChapter', ['text'])
NewParagraph = collections.namedtuple('NewParagraph', [])
Break = collections.namedtuple('Break', [])

# Parse and ignore trailing white space after our reserved words.
whitespace = parsec.regex(r'\s*', re.MULTILINE)
skip_whitespace = lambda p: p << whitespace  # noqa

# Break and new paragraph commands are simply reserved words.
break_command = skip_whitespace(parsec.string('#break')).result(Break())
par_command = skip_whitespace(parsec.string('#par')).result(NewParagraph())

# The newch_parser is used to implement the newch_command parser below.
newch_parser = skip_whitespace(parsec.string('#newch'))
# The text parser consumes all input between reserved words.
commands = '#newch|#par|#break'
text_parser = parsec.regex('(?!(%s))(.+?)(?=%s|$)' % (commands, commands))


@parsec.Parser
def text_command(text, index):
    """Parse a text command returning the text to be inserted."""
    res = text_parser(text, index)
    if not res.status:
        return res
Ejemplo n.º 25
0
class Schematic(namedtuple("Schematic", "name pos config rotation")):
    pass


class Schematics(namedtuple("Schematics", "width height tags tiles")):
    pass


HEADER = b"msch"
VERSION = b"\x00"

########################################
## Reader
########################################

header = string(HEADER)
version = string(VERSION)

everything = regex(b"(?s).*")  # don't forget newlines

byte = regex(b"(?s).")
char = byte.parsecmap(lambda x: unpack("b", x)[0])
short = regex(b"(?s).{2}").parsecmap(lambda x: unpack(">h", x)[0])
intp = regex(b"(?s).{4}").parsecmap(lambda x: unpack(">i", x)[0])
nbytes = lambda x: times(byte, x).parsecmap(lambda x: b"".join(x))


@generate
def utf8_bytes():
    """ Parses utf8 string, prefixed with length. """
    length = yield short
Ejemplo n.º 26
0
            return '[%s%s]' % (self.element_type, star_str)
        if self.is_dlist:
            return '[[%s%s]]' % (self.element_type, star_str)
        if self.is_set:
            return '{%s%s}' % (self.element_type, star_str)
        if self.is_dict:
            return '{%s: %s%s}' % (self.element_type[0], self.element_type[1],
                                   star_str)
        raise RuntimeError('Invalid codegen kind: %s' % self.kind)


name_pattern = parsec.spaces() >> parsec.regex(
    r'[_a-zA-Z][_a-zA-Z0-9<>, ]*(::[_a-zA-Z][_a-zA-Z0-9<>, ]*)*'
) << parsec.spaces()

star_pattern = parsec.spaces() >> parsec.optional(parsec.string('*'),
                                                  '') << parsec.spaces()

parse_meta = parsec.spaces().parsecmap(lambda _: CodeGenKind('meta'))

parse_plain = (parsec.spaces() >>
               (name_pattern + star_pattern) << parsec.spaces()
               ).parsecmap(lambda value: CodeGenKind('plain', value))
parse_list = (parsec.string('[') >>
              (name_pattern + star_pattern) << parsec.string(']')
              ).parsecmap(lambda value: CodeGenKind('list', value))
parse_dlist = (parsec.string('[[') >>
               (name_pattern + star_pattern) << parsec.string(']]')
               ).parsecmap(lambda value: CodeGenKind('dlist', value))
parse_set = (parsec.string('{') >>
             (name_pattern + star_pattern) << parsec.string('}')
Ejemplo n.º 27
0
import re

from parsec import string, sepBy, regex, sepEndBy1, spaces, Parser, separated, Value, generate, many1, digit

quoted_string = regex(r'"[^"]*"', re.MULTILINE)
cell = quoted_string ^ regex(r'[^,"\r\n]*')
end_line = regex(r'\r\n?', re.MULTILINE)
row = sepBy(cell, string(",") << spaces())
header = row
csv = (header << end_line) + sepEndBy1(row, end_line)


def parser_by_count(value):
    try:
        num_cells = int(value)
        return separated(cell,
                         string(",") << spaces(),
                         mint=num_cells,
                         maxt=num_cells)
    except ValueError:
        return Parser(
            lambda index, text: Value.failure(index, "expected a number"))


first_cell = (cell << string(",") << spaces())
counting_parser = first_cell.bind(parser_by_count)


# @generate
def matrix_parser():
    cell = many1(digit()).parsecmap(''.join).parsecmap(int)
Ejemplo n.º 28
0
 def parser():
     head = yield header
     yield parsec.many(parsec.string('\n'))
     samps = yield parsec.many(sample)
     return head, samps
Ejemplo n.º 29
0
def quoted():
    end_quote = yield quote
    body = yield many(charseq(end_quote))
    yield string(end_quote)
    raise StopGenerator(''.join(body))
Ejemplo n.º 30
0
 def sample():
     fwhm = yield spaces >> floating << spaces
     level = yield floating << spaces
     yield parsec.optional(parsec.string('\n'))
     return (float(fwhm), float(level))