def parse_obj(self, tokens):

        whitespace = regex(r'\s*')
        while_stmt = string('while')

        lexeme = lambda p: whitespace >> p << whitespace
        lbrace = lexeme(string('{'))
        rbrace = lexeme(string('}'))
        colon = lexeme(string(';'))

        cont = whitespace >> regex(r'\b(?:(?!while)\S)+\b') << whitespace

        content = (cont << colon | cont)

        while_obj = lbrace >> content.many() << rbrace
        while_ = while_stmt >> content.many() >> while_obj
        while_1 = whitespace >> while_ << whitespace
        full_make = (content.many() >> while_1 << content.many()
                     | content.many() >> while_1)

        try:
            res = full_make.parse(tokens)
            return True
        except Exception as err:
            return err
Beispiel #2
0
 def test_combine_dict(self):
     ddmmyyyy = seq(
         regex(r'[0-9]{2}').map(int).tag('day'),
         regex(r'[0-9]{2}').map(int).tag('month'),
         regex(r'[0-9]{4}').map(int).tag('year'),
     ).map(dict).combine_dict(date)
     self.assertEqual(ddmmyyyy.parse('05042003'), date(2003, 4, 5))
Beispiel #3
0
    def parse_kind(name: str, model: Dict[str, Kind]) -> Kind:
        def kind_by_name(kind_name: str) -> Kind:
            if kind_name not in model:
                raise AttributeError(f"Property kind is not known: {kind_name}. Have you registered it?")
            return model[kind_name]

        simple_kind_parser = regex("[A-Za-z][A-Za-z0-9_.]*").map(kind_by_name)
        bracket_parser = string("[]")
        dict_string_parser = string("dictionary[")
        comma_parser = regex("\\s*,\\s*")
        bracket_r = string("]")

        @make_parser
        def array_parser() -> Parser:
            inner = yield dictionary_parser | simple_kind_parser
            brackets = yield bracket_parser.times(1, float("inf"))
            return ArrayKind.mk_array(inner, len(brackets))

        @make_parser
        def dictionary_parser() -> Parser:
            yield dict_string_parser
            key_kind = cast(Kind, (yield simple_kind_parser))
            yield comma_parser
            value_kind = yield array_parser | dictionary_parser | simple_kind_parser
            yield bracket_r
            return DictionaryKind(key_kind, value_kind)

        return (array_parser | dictionary_parser | simple_kind_parser).parse(name)  # type: ignore
Beispiel #4
0
    def __init__(self):

        spaces = regex(r'[ \t]*')  # Excludes newline
        whitespace = regex(r'\s*')  # Includes newline

        newline = string('\n')
        equal = string('=')
        lbrace = whitespace << string('{') << whitespace
        rbrace = whitespace << string('}') << whitespace

        # These parsers don't terminate blocks
        word = regex('[^\s=}]+')
        words = word + (spaces + word).many().concat()
        characters = regex(r'[^}]*')

        key_value_line = seq(spaces >> word << spaces << equal,
                             spaces >> words << spaces)
        key_value_lines = key_value_line.sep_by(newline).map(dict)

        def block(name, content):
            return seq(whitespace >> name, lbrace >> content << rbrace)

        key_value_block = block(word, key_value_lines)
        key_value_blocks = key_value_block.many().map(dict_of_list)
        region_block = block(string('Region'), key_value_blocks)
        other_block = block(word, characters)

        self.parser = (region_block | other_block).many().map(dict)
Beispiel #5
0
 def test_seq_kwargs(self):
     self.assertEqual(
         seq(first_name=regex(r"\S+") << whitespace,
             last_name=regex(r"\S+")).parse("Jane Smith"), {
                 'first_name': 'Jane',
                 'last_name': 'Smith'
             })
Beispiel #6
0
 def test_combine_dict_list(self):
     Pair = namedtuple('Pair', ['word', 'number'])
     parser = seq(
         regex(r'[A-Z]+').tag('word'),
         regex(r'[0-9]+').map(int).tag('number'),
     ).combine_dict(Pair)
     self.assertEqual(parser.parse('ABC123'), Pair(word='ABC', number=123))
Beispiel #7
0
def lexeme(p):
    """
    From a parser (or string), make a parser that consumes
    whitespace on either side.
    """
    if isinstance(p, str):
        p = string(p)
    return regex(r'\s*') >> p << regex(r'\s*')
Beispiel #8
0
def whole_program():
    """
    parse whole program
    """
    yield regex(r'\s*')
    atoms = yield atom.sep_by(regex(r'\s*'))
    yield regex(r'\s*')
    return atoms
Beispiel #9
0
def lexer(code):
    whitespace = regex(r'\s*')
    integer = digit.at_least(1).concat().map(int)
    float_ = (digit.many() + string('.').result(['.']) +
              digit.many()).concat().map(float)
    parser = whitespace >> (
        (float_ | integer | regex(r'[()*/+-]')) << whitespace).many()
    return parser.parse(code)
Beispiel #10
0
def slist():
    start_pos = yield parsy.line_info
    forward = yield parsy.regex(r'[([]')
    atoms = yield atom.sep_by(parsy.whitespace)

    if forward == '(':
        yield regex(r'\s*') >> parsy.string(')')
    else:
        yield regex(r'\s*') >> parsy.string(']')
    end_pos = yield parsy.line_info
    return RList(atoms, sq=forward == '[', span=to_range(start_pos, end_pos))
Beispiel #11
0
def word():
    quote_char = yield regex('[\'"]').optional()

    if quote_char is None:
        return unquoted_word

    else:
        escape = regex(fr'\\[\\{quote_char}]').map(lambda x: x[-1])
        value_char = escape | regex(fr'[^\\{quote_char}]+')
        word = yield value_char.many().concat()
        yield string(quote_char).desc("quote")
        return word
def line_of_code():
    yield whitespace
    address = yield lexeme(number)
    yield colon
    byte_codes = yield lexeme((byte_code << parsy.string(' ')).times(1, max=4))
    mnemonic = yield lexeme(parsy.regex(r'\w{2,5}'))
    op1 = yield lexeme(operand.optional())
    yield lexeme(comma.optional())
    op2 = yield lexeme(operand.optional())
    yield lexeme(semicolon.optional())
    comment = yield lexeme(parsy.regex(r'.+').optional())
    return 'loc', address, byte_codes, mnemonic, op1, op2
Beispiel #13
0
def parseXYZContent(content):
    intParser = regex(r"[-+]?\d+").map(int)
    floatParser = regex(r"[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?").map(float)
    newLine = regex(r'\n')
    untilNewLine = regex(r'[^\n]*')

    @generate
    def parseHeader():
        num = yield intParser  # number of atoms
        yield newLine >> untilNewLine >> newLine  # comment
        return num

    @generate
    def parseRow():
        atom = yield regex(r'\s*[a-zA-Z0-9.]*')
        try:
            atomNumber = int(atom)
            atom = atom_names[atomNumber]
        except ValueError:
            pass
        yield whitespace.many()
        x = yield floatParser
        yield whitespace.many()
        y = yield floatParser
        yield whitespace.many()
        z = yield floatParser
        return np.array([x, y, z]), atom.strip()

    @generate
    def parseContent():
        yield parseHeader.optional()
        table = yield parseRow.many()
        yield whitespace.many()
        atomCoords = [a[0] for a in table]
        atomNames = [a[1] for a in table]
        return np.array(atomCoords), np.array(atomNames)

    table, atomNames = parseContent.parse(content)
    latticeVectors = None
    if 'VEC1' in atomNames:
        # this is a crystal
        latticeVectors = []
        for d in range(1, 4):
            vName = f'VEC{d}'
            assert vName in atomNames
            i = np.where(atomNames == vName)[0][0]
            latticeVectors.append(table[i])
            table = np.delete(table, i, axis=0)
            atomNames = np.delete(atomNames, i, axis=0)
        latticeVectors = np.array(latticeVectors)
    atomNumbers = np.array([atom_proton_numbers[an] for an in atomNames])
    return table, atomNumbers, atomNames, latticeVectors
Beispiel #14
0
def _nested() -> parsy.Parser:
    """
    Self-referential recursion helper for `type_atom`

    (looks for further type defs nested between `[` `]` pairs)
    """
    return (
        yield between(
            parsy.regex(r"\[\s*"),
            parsy.regex(r",?\s*\]"),  # allow line-breaks and trailing-comma
            type_atom.sep_by(parsy.regex(r",\s*")),  # includes new-lines
        )
    )
def description_parser():
    point_join = lambda *args: '.'.join(args)
    identifier = regex(r'[0-9a-zA-Z\$_]+')
    class_name = string('L') >> identifier.sep_by(
        string('/')).combine(point_join) << string(';')
    base_type = regex('[BCDFIJSZ]').map(lambda i: BASE_TYPE_NAMES[i])
    array = seq(
        string('[').at_least(1).map(lambda o: '[]' * len(o)),
        (base_type | class_name)).combine(lambda o, t: t + o)
    parameter = base_type | class_name | array
    parameters = parameter.many().combine(
        lambda *args: '(' + ','.join(args) + ')')
    void = string('V').map(lambda v: 'void')
    return_type = void | parameter
    return seq(string('(') >> parameters << string(')'), return_type)
def function():
    address = yield lexeme(number)
    yield parsy.string('<')
    fn_name = yield parsy.regex(r'[_\w\d\.]+')
    yield lexeme(parsy.string('>'))
    yield lexeme(parsy.string(':'))
    return 'fn', address, fn_name
Beispiel #17
0
    def test_regex(self):
        parser = regex(r'[0-9]')

        self.assertEqual(parser.parse('1'), '1')
        self.assertEqual(parser.parse('4'), '4')

        self.assertRaises(ParseError, parser.parse, 'x')
Beispiel #18
0
    def test_regex_bytes(self):
        parser = regex(rb'[0-9]')

        self.assertEqual(parser.parse(b'1'), b'1')
        self.assertEqual(parser.parse(b'4'), b'4')

        self.assertRaises(ParseError, parser.parse, b'x')
Beispiel #19
0
def backtick():
    """ Parse backticks. This is fugly.

    Backticks: I gave up on single-pass parsing here. It would be doable with the 'notes' extension, but would require
    enough context carrying forward that it'd need reimplementations of all bare string- and regex-matching things to
    understand how many levels deep they are.

    Here is the skinny: the shell has the $( ) which offer an objectively cleaner syntax.
    We parse backticks recursively because it's about the neatest approach to implement the shell spec that describes
    the feature in terms of a recursive implementation.

    The Posix shell spec, section 2.6.3, says this:

        Within the backquoted style of command substitution, <backslash> shall retain its literal meaning, except when
        followed by: '$', '`', or <backslash>. The search for the matching backquote shall be satisfied by the first
        unquoted non-escaped backquote; during this search, if a non-escaped backquote is encountered within a shell
        comment, a here-document, an embedded command substitution of the $(command) form, or a quoted string,
        undefined results occur. A single-quoted or double-quoted string that begins, but does not end, within the
        "`...`" sequence produces undefined results.

    What a mess.
    """
    content = yield string("`") >> (
        string("`").should_fail("backtick") >>
        (string(r"\`").result("`") | string(r"\$").result("$")
         | string(r"\\").result("\\") | regex(r'[^\\`]*')
         | string("\\"))).many().concat() << string("`")
    return command_sequence.parse(content)
Beispiel #20
0
def fields():
    by_index = []
    by_name = {}
    space = regex(r'\s+')

    @generate
    def field():
        # Always try matching named fields before indexed fields.  This is 
        # necessary the parser for an indexed field will match the first half 
        # of a named field but stop at the '='.  The next parser will then fail 
        # when it tries to continue from there.
        if by_name:
            yield named_field
        else:
            yield named_field | indexed_field

    @generate
    def named_field():
        k, v = yield key_value
        by_name[k] = v

    @generate
    def indexed_field():
        v = yield word
        by_index.append(v)
    
    yield field.sep_by(space, min=1)
    return Fields(by_index, by_name)
Beispiel #21
0
    def test_regex(self):
        parser = regex(r'[0-9]')

        self.assertEqual(parser.parse('1'), '1')
        self.assertEqual(parser.parse('4'), '4')

        self.assertRaises(ParseError, parser.parse, 'x')
Beispiel #22
0
def test_space2(s0, s1):
    """
    following non-whitespace is not consumed
    """
    p = space() + parsy.regex(r'.*')
    val = p.parse(s0 + s1)
    assert val == s1
Beispiel #23
0
def number():
    start_pos = yield parsy.line_info
    ret = yield regex(r'[+-]?\d+(\.\d+)?').map(map_number)
    end_pos = yield parsy.line_info
    ran = to_range(start_pos, end_pos)
    ret.range = ran
    return ret
Beispiel #24
0
def parse_header(name: str, clock: str) -> Tuple[str, Dict[str, str]]:
    in8 = string('    VL_IN8(').map(lambda x: 'IN8')
    in16 = string('    VL_IN16(').map(lambda x: 'IN16')
    in32 = string('    VL_IN(').map(lambda x: 'IN32')
    in64 = string('    VL_IN64(').map(lambda x: 'IN64')
    inw = string('    VL_INW(').map(lambda x: 'INW')

    out8 = string('    VL_OUT8(').map(lambda x: 'OUT8')
    out16 = string('    VL_OUT16(').map(lambda x: 'OUT16')
    out32 = string('    VL_OUT(').map(lambda x: 'OUT32')
    out64 = string('    VL_OUT64(').map(lambda x: 'OUT64')
    outw = string('    VL_OUTW(').map(lambda x: 'OUTW')

    ports = (in8 | in16 | in32 | in64 | inw | out8 | out16
             | out32 | out64 | outw).desc('variable width definition')

    varname = regex('[a-za-z]+\w*').desc('variable name')

    with open(f'obj_dir/V{name}.h', 'r') as f:
        lines = f.readlines()

    portlist = {}
    for line in lines:
        try:
            port_def, _ = seq(ports, varname).parse_partial(line)
            portlist[port_def[1]] = port_def[0]
        except ParseError:
            pass

    # remove clock from port list
    del portlist[clock]

    return name, portlist
def expr_cont_quantified():
    b = yield lexme(regex(r"@|!|\?!|\?|\\|lambda"))
    v = yield ident
    yield dot
    body = yield expr
    yield rparen
    return EQuantified(b, v, body)
Beispiel #26
0
def eol():
    """ Parse and consume a single '\n' character.

    If there are any heredocs pending, immediately consume more lines of input
    until all heredocs are filled in.
    """
    yield string("\n")

    # Do we need to consume some heredocs?
    notes = yield get_notes

    # make a copy of this list so that we don't perturb the note.
    hds = list(notes.get('hds', []))

    while len(hds) > 0:
        # The next heredoc to scan for
        hd = hds.pop(0)

        lines = []
        while True:
            line = yield eof.result(EOF) | regex(
                "[^\n]*\n") | regex("[^\n]*") << eof
            if line is EOF:
                return fail("looking for heredoc ending with " + hd.end)
            if line.rstrip("\n") == hd.end:
                break
            lines.append(line)

        content = '\n'.join(lines)

        if content == '':
            content = ConstantString("")
        elif hd.quote is None:
            content = double_content.parse(content)
        else:
            content = ConstantString(content)

        # Back-fill the HereDoc content. Note, this is *not* undone by backtracking.
        # However, a backtrack and re-parse may overwrite this value; so in the end,
        # it's likely that this will do what we want.
        hd.file = content

        # `notes` itself is a shallow copy, so we don't need to worry about copying it here.
        notes['hds'] = hds
        yield put_note(notes)
    return "\n"
Beispiel #27
0
def test_regex():
    parser = regex(r'[0-9]')

    assert parser.parse('1') == '1'
    assert parser.parse('4') == '4'

    try: parser.parse('x'); assert False
    except ParseError: pass
    def __init__(self):
        # TODO: finish this parser to it does more than just parse indi and GPO reponses
        indi_operator = string_from("indi")
        gp_operator = string_from("GPO", "GPI")
        space = string(" ")
        obj = regex(r"[a-zA-Z0-9.#]*")
        name = regex(r"[a-zA-Z]*")
        simple_string = regex(r"[a-zA-Z ]*")
        equals = string("=")
        value = regex(r"[^,]*") | regex(r'".*"')
        number = regex(r"[0-9]+")
        gp_value = regex(r"[hl]").map(lambda v: {"h": False, "l": True}[v]) * 5

        indi_parser = seq(indi_operator << space).then(
            seq(
                path=obj << space.optional(),
                info=seq(
                    name=name << equals,
                    value=value <<
                    string(", ").optional()).map(lambda x: {
                        x["name"]: x["value"]
                    }).many().map(
                        lambda kv: {k: v
                                    for d in kv for k, v in d.items()}),
            ))
        gp_parser = seq(gp_operator << space).then(
            seq(number=number << space, pins=gp_value))
        error_parser = seq(string("ERROR") << space).then(
            seq(number << space, simple_string))

        self.p = indi_parser | gp_parser | error_parser
Beispiel #29
0
 def parseRow():
     atom = yield parsy.regex(r'\s*[a-zA-Z.]*')
     yield parsy.whitespace.many()
     x = yield floatParser
     yield parsy.whitespace.many()
     y = yield floatParser
     yield parsy.whitespace.many()
     z = yield floatParser
     return {'atom':atom, 'x':x, 'y':y, 'z':z}
Beispiel #30
0
 def parseRow():
     atom = yield regex(r'\s*[a-zA-Z.]*')
     yield whitespace.many()
     x = yield floatParser
     yield whitespace.many()
     y = yield floatParser
     yield whitespace.many()
     z = yield floatParser
     return np.array([x, y, z]), atom.strip()
Beispiel #31
0
def test_nested_basic():
    p = nested(
        parsy.string('('),
        parsy.string(')'),
        parsy.regex('[0-9]+').map(int),
        parsy.string(' '),
    )
    result = p.parse("(0 1 (2 3) (4 5 6) 7 8)")
    assert result == [0, 1, [2, 3], [4, 5, 6], 7, 8]
Beispiel #32
0
from parsy import string, regex, generate
import re
import pdb

whitespace = regex(r'\s+', re.MULTILINE)
comment = regex(r';.*')
ignore = (whitespace | comment).many()

lexeme = lambda p: p << ignore

lparen = lexeme(string('('))
rparen = lexeme(string(')'))
number = lexeme(regex(r'\d+')).map(int)
symbol = lexeme(regex(r'[\d\w_-]+'))
true   = lexeme(string('#t')).result(True)
false  = lexeme(string('#f')).result(False)

atom = true | false | number | symbol

@generate
def form():
    yield lparen
    els = yield expr.many()
    yield rparen
    return els

@generate
def quote():
    yield string("'")
    e = yield expr
    return ['quote', e]
Beispiel #33
0
from parsy import string, regex, generate, ParseError
import pdb

letter = regex(r'[a-zA-Z]')
digit  = regex(r'[0-9]')

def test_string():
    parser = string('x')
    assert parser.parse('x') == 'x'

    try: parser.parse('y'); assert False
    except ParseError: pass

def test_regex():
    parser = regex(r'[0-9]')

    assert parser.parse('1') == '1'
    assert parser.parse('4') == '4'

    try: parser.parse('x'); assert False
    except ParseError: pass

def test_then():
    xy_parser = string('x') >> string('y')
    assert xy_parser.parse('xy') == 'y'

    try: xy_parser.parse('y'); assert False
    except ParseError: pass

    try: xy_parser.parse('z'); assert False
    except ParseError: pass
Beispiel #34
0
from parsy import string, regex, generate
import re
from sys import stdin

whitespace = regex(r'\s*', re.MULTILINE)

lexeme = lambda p: p << whitespace

lbrace = lexeme(string('{'))
rbrace = lexeme(string('}'))
lbrack = lexeme(string('['))
rbrack = lexeme(string(']'))
colon  = lexeme(string(':'))
comma  = lexeme(string(','))
true   = lexeme(string('true')).result(True)
false  = lexeme(string('false')).result(False)
null   = lexeme(string('null')).result(None)

number = lexeme(
  regex(r'-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?')
).map(float)

string_part = regex(r'[^"\\]+')
string_esc = string('\\') >> (
  string('\\')
  | string('/')
  | string('b').result('\b')
  | string('f').result('\f')
  | string('n').result('\n')
  | string('r').result('\r')
  | string('t').result('\t')