Ejemplo n.º 1
    def __init__(self):
        # Parser combinators

        SPACES = spaces()
        optional_spaces = optional(SPACES)
        empty = SPACES.parsecmap(lambda x: EMPTY)
        comment = string('%%%') >> regex('.*')
        comment = comment.parsecmap(Comment)
        codepoint_hex = regex('[0-9A-F]+')
        codepoint_hex = codepoint_hex.parsecmap(lambda x: int(x, 16))
        codepoint = string('U+') >> codepoint_hex
        codepoint_seq = sepBy(codepoint, SPACES)
        codepoint_seq = codepoint_seq.parsecmap(tuple)
        arrow = string('=>')
        arrow = optional_spaces >> arrow << optional_spaces
        mapping = joint(
            codepoint_seq << arrow,
        mapping = mapping.parsecmap(lambda x: Mapping(x[0], x[1], x[2]))
        line = try_choice(mapping, try_choice(
        self.parse = line.parse
Ejemplo n.º 2
def enum():
    yield lexeme(string('enum'))
    name = yield identifier
    yield lexeme(string('{'))
    members = yield many(enum_value)
    yield lexeme(string('}'))
    return ProtobufEnum(name, members)
Ejemplo n.º 3
def message():
    yield lexeme(string('message'))
    name = yield identifier
    yield lexeme(string('{'))
    fields = yield many(field)
    yield lexeme(string('}'))
    return Message(name, fields)
Ejemplo n.º 4
def sdf_molecule():
    header = yield sdf_header
    atoms = yield P.times(sdf_atom, header.atom_num)
    bonds = yield P.times(sdf_bond, header.bond_num)
    yield seperator >> P.string('M') >> seperator >> P.string(
        'END') >> seperator >> P.string('> <value>')
    value = yield seperator >> num_f
    yield seperator >> P.string('$$$$')
    return SdfMolecule(header, atoms, bonds, value)
Ejemplo n.º 5
def matrix_parser():
    cell = many1(digit()).parsecmap(''.join).parsecmap(int)
    height = yield cell
    yield (string(",") << spaces())
    width = yield cell
    yield string('\n')
    row = separated(cell, string(",") << spaces(), mint=width, maxt=width)
    rows = separated(row, string('\n'), mint=height, maxt=height)
    return rows
Ejemplo n.º 6
def sdf_header():
    mol_num = yield transparent >> num_i
    atom_num = yield P.string('\n\n\n') >> P.times(P.one_of(' 0123456789'), 3)
    atom_num = int(''.join(atom_num))
    bond_num = yield P.times(P.one_of(' 0123456789'), 3)
    bond_num = int(''.join(bond_num))
    yield P.times(seperator >> num_i, 5)
    yield seperator >> P.string("V2000")
    return SdfHeader(mol_num, atom_num, bond_num)
Ejemplo n.º 7
def pins():
    "Parse [Component].[Pin]."

    def filt(x):
        (_, (mod, _)) = x
        m = mod.upper()
        return (not ((m == "POWER") or (m == "GND") or (m == "NC")))

    yield (lexeme(string("signal_name")) << lexeme(string("model_name")))
    rlcs = yield optional(count(rlc, 3), [])
    prs = yield many1(pin(rlcs))
    prs_filt = list(filter(filt, prs))
    return dict(prs_filt)
Ejemplo n.º 8
 def string_esc():
     return string('\\') >> (string('\\')
                             | string('/')
                             | string('b').result('\b')
                             | string('f').result('\f')
                             | string('n').result('\n')
                             | string('r').result('\r')
                             | string('t').result('\t')
                             | string(end_quote))
Ejemplo n.º 9
def ratio():
    [num, den] = yield (separated(number, string("/"), 2, maxt=2, end=False)
                        | na.result([0, 0]))
    if den:
        return num / den
        return None
Ejemplo n.º 10
def sdf_bond():
    fst = yield P.string('\n') >> P.times(P.one_of(' 0123456789'), 3)
    fst = int(''.join(fst))
    snd = yield P.times(P.one_of(' 0123456789'), 3)
    snd = int(''.join(snd))
    bond_type = yield seperator >> num_i
    yield P.times(seperator >> num_i, 3)
    return SdfBond(fst, snd, bond_type)
Ejemplo n.º 11
def abbrev(
    obj: Union[MeasurementUnit, MeasurementUnitQualifier, ], ) -> Parser:
    Matches an abbreviation and returns the associated object.

    Humans cannot be relied upon to use spaces or thousand separators correctly
    so these can ignored.
    return reduce(
            string(obj.abbreviation.replace(" ", "")),
            string(obj.abbreviation.replace("1,000", "1000")),
            string(obj.abbreviation.replace(" ", "").replace("1,000", "1000")),
Ejemplo n.º 12
def parse_untenbi():
    raw_pattern = yield (pattern_def ^ parsec.string(""))
    raw_rules = yield all_rules

    result = {}

    if raw_pattern:
        result["pattern"] = pattern_translate[raw_pattern]

    # Interpret each rule
    for raw_rule in raw_rules:

        if raw_rule["rule"] in {"start_date", "start_date_plus1"}:
            if raw_rule["rule"] == "start_date_plus1":
                result["start"] = day_plus_one(raw_rule["day"])
                result["start"] = raw_rule["day"]

            if "end" not in result:
                result["end"] = (12, 31)

            if "pattern" not in result:
                result["pattern"] = "毎日"

        elif raw_rule["rule"] in {"end_date", "end_date_minus1"}:
            if raw_rule["rule"] == "end_date_minus1":
                result["end"] = day_minus_one(raw_rule["day"])
                result["end"] = raw_rule["day"]

            if "start" not in result:
                result["start"] = (1, 1)

            if "pattern" not in result:
                result["pattern"] = "毎日"


            if "pattern" not in result:
                    "pattern"] = "全休" if raw_rule["rule"] == "added" else "毎日"

            if raw_rule["rule"] not in result:
                result[raw_rule["rule"]] = set()


    # Nullyfing exceptions
    if "added" in result and "removed" in result:
        if result["pattern"] == "毎日":
            del result["added"]

        elif result["pattern"] == "全休":
            del result["removed"]

    return result
Ejemplo n.º 13
def field():
    q = yield qualifier
    ft = yield field_type
    ident = yield identifier
    yield equals
    fi = yield field_id
    options = yield optional(field_options, default_value=[])
    yield lexeme(string(';'))
    return Field(q, ft, ident, fi, options)
Ejemplo n.º 14
def parser_by_count(value):
        num_cells = int(value)
        return separated(cell,
                         string(",") << spaces(),
    except ValueError:
        return Parser(
            lambda index, text: Value.failure(index, "expected a number"))
Ejemplo n.º 15
def param():
    "Parse IBIS parameter."
    pname = yield regex(
        re.MULTILINE)  # Parameters must begin with a letter in column 1.
    if DBG:
    res = yield (regex(r"\s*") >> (
        (word(string("=")) >> number) | typminmax | name | rest_line))
    yield ignore  # So that ``param`` functions as a lexeme.
    return (pname.lower(), res)
Ejemplo n.º 16
def single_day_rule():
    date = yield single_day_def
    rule_type = yield (parsec.string("から運転") ^ parsec.string("からは運転")
                       ^ parsec.string("からは運休") ^ parsec.string("まで運転")
                       ^ parsec.string("までは運転") ^ parsec.string("まで運休")
                       ^ parsec.string("までは運休"))

    if date["day"][0] is None:
        raise ValueError("month definition is required in から運転・まで運転 rules")

    return {"day": date["day"], "rule": rule_type_translations[rule_type]}
Ejemplo n.º 17
def fix_image_url(url, repo_name):
    '''Fixes a GitHub image urls.

    Any links with `github.com` are invalid, because the return *html*
    content. Image links would have `githubusercontent.com`. For example:

    - This returns an html: https://github.com/Retrothopter/Niobium-Nanotech/blob/master/Preview.png
    - This returns a png: https://githubusercontent.com/Retrothopter/Niobium-Nanotech/blob/master/Preview.png

    Any links that are relative are also invalid. For example:

    - preview.png
    - sprites/preview.png
    - /sprites/preview.png'''
    # FIXME: this assumes `master` is always the branch we want, while in reality we need the
    # `default_branch` of the repository, which could also for example be `main`
    from urllib.parse import urlparse
    from parsec import optional, string, regex, none_of, many, ParseError

    glob = (
        optional(string('/')) >> string(repo_name) >> string("/blob/master/")
        >> many(none_of("?")).parsecmap(lambda x: "".join(x)))

    o = urlparse(url)
    if o.netloc == "raw.githubusercontent.com":
        return url

        path = glob.parse(o.path)
    except ParseError as e:
        path = None
    if o.netloc == "github.com" and path:
        return f"https://raw.githubusercontent.com/{repo_name}/master/{path}"

    if o.netloc == "":
        return f"https://raw.githubusercontent.com/{repo_name}/master/{o.path}"

    return url
Ejemplo n.º 18
def fix_image_url(url, repo_name):
    '''Fixes a GitHub url, where the url should point to an image.

    Any links with `github.com` are invalid, because they're html links, while
    image links would have `githubusercontent.com`, for example:
    - https://github.com/Retrothopter/Niobium-Nanotech/blob/master/Preview.png;

    Any links that don't have a domain are relative and as such invalid, for example:
    - preview.png;
    - sprites/preview.png;
    - /sprites/preview.png

    This is also why a repo name is required.
    from urllib.parse import urlparse
    from parsec import optional, string, regex, none_of, many, ParseError

    glob = (
        optional(string('/')) >> string(repo_name) >> string("/blob/master/")
        >> many(none_of("?")).parsecmap(lambda x: "".join(x)))

    o = urlparse(url)
    if o.netloc == "raw.githubusercontent.com":
        return url

        path = glob.parse(o.path)
    except ParseError as e:
        path = None
    if o.netloc == "github.com" and path:
        return f"https://raw.githubusercontent.com/{repo_name}/master/{path}"

    if o.netloc == "":
        return f"https://raw.githubusercontent.com/{repo_name}/master/{o.path}"

    # print('[warning] non github url:', url)
    return url
Ejemplo n.º 19
 def fn():
     "Parse IBIS keyword."
     yield regex(r"^\[", re.MULTILINE)
     wordlets = yield sepBy1(name_only, one_of(
         " _"))  # ``name`` gobbles up trailing space, which we don't want.
     yield string("]")
     yield ignore  # So that ``keyword`` functions as a lexeme.
     res = ("_".join(wordlets)
            )  # Canonicalize to: "<wordlet1>_<wordlet2>_...".
     if kywrd:
         # assert res.lower() == kywrd.lower(), f"Expecting: {kywrd}; got: {res}."  # Does not work!
         if res.lower() == kywrd.lower():
             return res
             return fail.desc(f"Expecting: {kywrd}; got: {res}.")
     return res
Ejemplo n.º 20
    def test_memberMappings(self):
        from parsec import sepBy
        from parsec import string
        from proguard_mapping_parser.parser import memberMapping

        members = sepBy(memberMapping, string('\n'))
                ((None, ('java.util.HashMap', None), 'mHashMap', None), 'a'),
                (((35, 37), ('void', None), '<init>', []), '<init>'),
                (((66, 66), ('boolean', None), 'contains', [
                    ('java.lang.Object', None),
                ]), 'a'),
                '    java.util.HashMap mHashMap -> a\n'
                '    35:37:void <init>() -> <init>\n'
                '    66:66:boolean contains(java.lang.Object) -> a\n'))
Ejemplo n.º 21
def range_def():
    start_month = yield month_def
    start_day = yield day_def

    yield parsec.string("~")

    end_month = yield month_def
    end_day = yield day_def

    start_month = int(start_month) if start_month else None
    start_day = int(start_day)

    end_month = int(end_month) if end_month else None
    end_day = int(end_day)

    return {
        "type": "range",
        "start": (start_month, start_day),
        "end": (end_month, end_day)
Ejemplo n.º 22
 def _parse_fasta(self, filehandle, sep="|"):
     Parse a fasta file. The header is split into fields on 'sep'. The
     sequence is added as a final field.
     p_header = parsec.string(">") >> parsec.regex("[^\n\r]*") << parsec.spaces()
     p_seq = (
             parsec.regex("[^>\n\r]*"), sep=parsec.regex("[\r\n\t ]+")
         << parsec.spaces()
     p_entry = p_header + p_seq
     p_fasta = parsec.many1(p_entry)
     log(f"Reading {file_str(filehandle)} as a fasta file:")
         entries = p_fasta.parse(filehandle.read())
     except AttributeError:
         # in case I want to pass in a list of strings, e.g., in tests
         entries = p_fasta.parse(filehandle)
     row = [h.split(sep) + [q] for (h, q) in entries]
     return row
Ejemplo n.º 23
    pip install relaxedjson

To install as an egg-link in development mode::

    python setup.py develop -N


import re
from parsec import (sepBy, regex, string, generate, many, endBy)

whitespace = regex(r'\s*', re.MULTILINE)

lexeme = lambda p: p << whitespace

comment = string('/*') >> regex(r'(?:[^*]|\*(?!\/))+',
                                re.MULTILINE) << string('*/')
comment = lexeme(comment)

lbrace = lexeme(string('{'))
rbrace = lexeme(string('}'))
lbrack = lexeme(string('['))
rbrack = lexeme(string(']'))
colon = lexeme(string(':'))
comma = lexeme(string(','))
true = lexeme(string('true')).result(True)
false = lexeme(string('false')).result(False)
null = lexeme(string('null')).result(None)
quote = string('"') | string("'")

Ejemplo n.º 24

# The targets of our parsing.
# We look for instructions to insert text, chapters,
# paragraphs and line breaks.
Text = collections.namedtuple('Text', ['text'])
NewChapter = collections.namedtuple('NewChapter', ['text'])
NewParagraph = collections.namedtuple('NewParagraph', [])
Break = collections.namedtuple('Break', [])

# Parse and ignore trailing white space after our reserved words.
whitespace = parsec.regex(r'\s*', re.MULTILINE)
skip_whitespace = lambda p: p << whitespace  # noqa

# Break and new paragraph commands are simply reserved words.
break_command = skip_whitespace(parsec.string('#break')).result(Break())
par_command = skip_whitespace(parsec.string('#par')).result(NewParagraph())

# The newch_parser is used to implement the newch_command parser below.
newch_parser = skip_whitespace(parsec.string('#newch'))
# The text parser consumes all input between reserved words.
commands = '#newch|#par|#break'
text_parser = parsec.regex('(?!(%s))(.+?)(?=%s|$)' % (commands, commands))

def text_command(text, index):
    """Parse a text command returning the text to be inserted."""
    res = text_parser(text, index)
    if not res.status:
        return res
Ejemplo n.º 25
class Schematic(namedtuple("Schematic", "name pos config rotation")):

class Schematics(namedtuple("Schematics", "width height tags tiles")):

HEADER = b"msch"
VERSION = b"\x00"

## Reader

header = string(HEADER)
version = string(VERSION)

everything = regex(b"(?s).*")  # don't forget newlines

byte = regex(b"(?s).")
char = byte.parsecmap(lambda x: unpack("b", x)[0])
short = regex(b"(?s).{2}").parsecmap(lambda x: unpack(">h", x)[0])
intp = regex(b"(?s).{4}").parsecmap(lambda x: unpack(">i", x)[0])
nbytes = lambda x: times(byte, x).parsecmap(lambda x: b"".join(x))

def utf8_bytes():
    """ Parses utf8 string, prefixed with length. """
    length = yield short
Ejemplo n.º 26
            return '[%s%s]' % (self.element_type, star_str)
        if self.is_dlist:
            return '[[%s%s]]' % (self.element_type, star_str)
        if self.is_set:
            return '{%s%s}' % (self.element_type, star_str)
        if self.is_dict:
            return '{%s: %s%s}' % (self.element_type[0], self.element_type[1],
        raise RuntimeError('Invalid codegen kind: %s' % self.kind)

name_pattern = parsec.spaces() >> parsec.regex(
    r'[_a-zA-Z][_a-zA-Z0-9<>, ]*(::[_a-zA-Z][_a-zA-Z0-9<>, ]*)*'
) << parsec.spaces()

star_pattern = parsec.spaces() >> parsec.optional(parsec.string('*'),
                                                  '') << parsec.spaces()

parse_meta = parsec.spaces().parsecmap(lambda _: CodeGenKind('meta'))

parse_plain = (parsec.spaces() >>
               (name_pattern + star_pattern) << parsec.spaces()
               ).parsecmap(lambda value: CodeGenKind('plain', value))
parse_list = (parsec.string('[') >>
              (name_pattern + star_pattern) << parsec.string(']')
              ).parsecmap(lambda value: CodeGenKind('list', value))
parse_dlist = (parsec.string('[[') >>
               (name_pattern + star_pattern) << parsec.string(']]')
               ).parsecmap(lambda value: CodeGenKind('dlist', value))
parse_set = (parsec.string('{') >>
             (name_pattern + star_pattern) << parsec.string('}')
Ejemplo n.º 27
import re

from parsec import string, sepBy, regex, sepEndBy1, spaces, Parser, separated, Value, generate, many1, digit

quoted_string = regex(r'"[^"]*"', re.MULTILINE)
cell = quoted_string ^ regex(r'[^,"\r\n]*')
end_line = regex(r'\r\n?', re.MULTILINE)
row = sepBy(cell, string(",") << spaces())
header = row
csv = (header << end_line) + sepEndBy1(row, end_line)

def parser_by_count(value):
        num_cells = int(value)
        return separated(cell,
                         string(",") << spaces(),
    except ValueError:
        return Parser(
            lambda index, text: Value.failure(index, "expected a number"))

first_cell = (cell << string(",") << spaces())
counting_parser = first_cell.bind(parser_by_count)

# @generate
def matrix_parser():
    cell = many1(digit()).parsecmap(''.join).parsecmap(int)
Ejemplo n.º 28
 def parser():
     head = yield header
     yield parsec.many(parsec.string('\n'))
     samps = yield parsec.many(sample)
     return head, samps
Ejemplo n.º 29
def quoted():
    end_quote = yield quote
    body = yield many(charseq(end_quote))
    yield string(end_quote)
    raise StopGenerator(''.join(body))
Ejemplo n.º 30
 def sample():
     fwhm = yield spaces >> floating << spaces
     level = yield floating << spaces
     yield parsec.optional(parsec.string('\n'))
     return (float(fwhm), float(level))