Esempio n. 1
0
def _build_parse_rest_of_etc():
    def parse_rest_of_double_quoted_string_so_far(scn):
        return list(do_parse_rest_of_double_quoted_string_so_far(scn))
        # list because allow popping #here1 because meh

    def do_parse_rest_of_double_quoted_string_so_far(scn):
        # Keep scanning the inside of the string while you have
        # either a run of ordinary content or a supported escape sequence
        while True:

            as_is = scn.scan(one_or_more_not_this_or_that)
            bs = scn.skip(a_backslash)
            if bs:
                what_kind = scn.scan_required(valid_backslash_ting)

            if not any((as_is, bs)):
                break

            if bs:
                if 'n' == what_kind:
                    typ = 'newline_escape_sequence'
                elif 't' == what_kind:
                    typ = 'tab_escape_sequence'
                else:
                    assert '"' == what_kind
                    typ = 'double_quote_escape_sequence'
            else:
                typ = None

            yield 'unencoded_string_content', (as_is or '')
            yield 'any_escape_sequence', typ

        # Either you're at the end of the line or the end of the d.quote

        if scn.empty:
            yield ('end_of_surface_line', )
            return

        scn.skip_required(double_quote)
        yield ('end_of_double_quoted_string', )

    from text_lib.magnetics.string_scanner_via_string import \
        pattern_via_description_and_regex_string as o

    one_or_more_not_this_or_that = o(
        'one or more not double quote or backslash', r'[^"\\]+')

    a_backslash = o('a backslash', r'\\')

    valid_backslash_ting = o("'t' or 'n' or '\"'", '[tn"]')

    double_quote = o("double quote", '"')

    return parse_rest_of_double_quoted_string_so_far
Esempio n. 2
0
def _spans_of_old(throwing_listener):

    from text_lib.magnetics.string_scanner_via_string import \
        StringScanner as string_scanner_via, \
        pattern_via_description_and_regex_string as o

    zero_or_more_not_special = o('zero or more not special', r'[^,"\\\n\r]*')
    one_or_more_spaces = o('one or more spaces', '[ ]+')

    def spans_via_line(line):

        scn = string_scanner_via(line, throwing_listener)
        begin_pos = scn.pos
        while True:
            scn.skip_required(zero_or_more_not_special)
            if scn.empty:
                xx("line ended without a terminating newline")
            end_pos = scn.pos
            yield begin_pos, end_pos
            char = line[end_pos]

            if ',' == char:
                scn.advance_by_one()
                scn.skip(one_or_more_spaces)
                if scn.empty:
                    xx()
                begin_pos = scn.pos
                continue

            if '\n' == char:
                scn.advance_by_one()
                assert scn.empty
                break

            if '"' == char:
                xx("have fun parsing escaped quotes")

            xx(f"unexpected character {char!r}")

    return spans_via_line
Esempio n. 3
0
def _build_tokenized_sections_via():

    def tokenized_sections_via(docu, body_of_text, listener):
        throwing_listener = build_throwing_listener(listener, stop)
        sect_els = _section_elements(docu, body_of_text, listener)
        for section_el in sect_els:
            key = section_el.string_key()
            scn = StringScanner(key, throwing_listener)
            try:
                yield sexp_via_parse_section_key(scn, section_el)
            except stop:
                break

    def sexp_via_parse_section_key(scn, section_el):
        s = scn.scan_required(first_word)
        if 'document-meta' == s:
            scn.skip_required(eos)
            return 'document_meta', None, section_el
        scn.skip_required(colon)
        eid = scn.scan_required(identifier)
        scn.skip_required(colon)
        scn.skip_required(attributes)
        scn.skip_required(eos)
        return 'entity_section', eid, section_el

    from text_lib.magnetics.string_scanner_via_string import \
        StringScanner, pattern_via_description_and_regex_string as o, \
        build_throwing_listener

    first_word = o('entity|document-meta', r'(entity|document-meta)\b')
    eos = o('end of string', '$')
    colon = o("colon (':') and space", ': ')  # or make space optional
    identifier = o('identifier', '[A-Z0-9]{3}')  # ..
    attributes = o("'attributes' keyword", r'attributes\b')

    class stop(RuntimeError):
        pass  # make our own even thos there's one out there. safer

    return tokenized_sections_via
Esempio n. 4
0
def _build_name_gist_via_name():
    from text_lib.magnetics.string_scanner_via_string import \
        StringScanner, pattern_via_description_and_regex_string as o

    # define some reflexive regexes

    all_LC_or_UC = o('all lowercase or all uppercase attribute name piece',
                     r'[a-z0-9]+|[A-Z0-9]+')

    field_name_word_separator = o('underscore', '_')

    # at #history-A.3, changed from dash for [#873.21] (use underscores not..)

    # exactly_one_space = o('exactly one space', ' ')

    # equals_sign = o('equals sign', '=')

    # use them:

    def name_gist_via_name(attr_name_string, listener):
        # (before #history-A.1, this was how _AttributeName was built

        scn = StringScanner(attr_name_string, listener)
        pieces = []
        while True:
            s = scn.scan_required(all_LC_or_UC)
            if s is None:
                return
            pieces.append(s)
            if scn.empty:
                break
            if not scn.skip_required(field_name_word_separator):
                return
        return ''.join(s.lower() for s in pieces)

    return name_gist_via_name
Esempio n. 5
0
def sexps_via_lines(lines, listener=None):
    """NOTE this is a rough proof-of-concept. It will *not* parse all

    GraphViz documents, nor is it intended to [etc the usual disclaimer..]

    However, wherever it was a good fit, we tried to use names from the
    published grammar

    There's a CLI-exposed toolkit for developing this under `kst`
    """

    # == States [#008.2]

    def from_beginning_state():
        yield if_open_digraph_line, move_to_inside_digraph

    def from_root_of_document():
        yield if_blank_line, ignore_for_now
        yield if_open_multiline_comment_line, enter_multiline_comment

    def from_inside_digraph():
        yield if_blank_line, ignore_for_now
        yield if_open_node_statement, handle_line_that_begins_node_statement
        yield if_edge_statement, handle_line_that_begins_edge_statement
        yield if_set_an_attribute_statement, ignore_for_now
        # comment line one day
        yield if_close_clurly, close_digraph

    def from_inside_attribute_list():
        yield if_scan_attribute_assignment_left_hand_side, push_to_attr_value
        yield if_skip_a_comma, do_nothing
        yield if_scan_close_square_bracket, pop_out_of_attr_list_kinda_big_deal

    def from_before_attribute_value():
        yield if_scan_a_double_quote, BEGIN_TO_PARSE_DOUBLE_QUOTED_VALUE
        yield true, parse_a_not_double_quoted_value

    def from_inside_double_quoted_string():
        yield true, parse_rest_of_inside_quoted_string

    def from_inside_multiline_comment():
        yield if_line_that_closes_multiline_comment, pop_out_of_multiline_comme
        yield true, ignore_for_now

    # == Actions

    def move_to_inside_digraph():
        move_to(from_root_of_document)
        push_to(from_inside_digraph)

    # -- mess with line scanning

    def handle_line_that_begins_node_statement():
        store['element_start_lineno'] = lineno
        store['current_entity_type'] = 'node'

        md = store.pop('last_match')
        store['current_node_identifier'] = md['node_identifier']

        scn = build_scanner(line)
        scn.advance_to_position(md.span()[1])
        store['current_string_scanner'] = scn

        push_to(from_inside_attribute_list)
        return parse_to_end_of_line()

    def handle_line_that_begins_edge_statement():
        store['element_start_lineno'] = lineno
        store['current_entity_type'] = 'edge'

        md = store.pop('last_match')
        store['left_node_identifier'], store['left_node_port'] = md.groups()

        scn = build_scanner(line)
        scn.advance_to_position(md.span()[1])
        store['current_string_scanner'] = scn

        store['right_node_identifier'] = scn.scan_required(identifier)
        rhs_port = None
        if ':' == scn.peek(1):
            scn.advance_by_one()
            rhs_port = scn.scan_required(identifier)
        store['right_node_port'] = rhs_port

        store['current_attribute_list'] = []

        # If the edge has attributes, parse them, else you're done with line
        if scn.skip(open_square_bracket):
            push_to(from_inside_attribute_list)
            return parse_to_end_of_line()

        skip_required_end_of_line(scn)
        store.pop('current_string_scanner')
        return finish_edge_or_node()  # (but we know it's edge)

    def push_to_attr_value():
        store['current_attribute_name']  # sanity check, catch it early
        push_to(from_before_attribute_value)

    def BEGIN_TO_PARSE_DOUBLE_QUOTED_VALUE():
        scn = self.scn
        sxs = _parse_rest_of_double_quoted_string_so_far(scn)
        typ, = sxs[-1]
        assert 'from_before_attribute_value' == stack[-1].__name__
        store['current_attribute_list'] = []
        if 'end_of_surface_line' == typ:
            sxs.pop()  # #here1
            store['current_double_quoted_string_sexp'] = sxs
            push_to(from_inside_double_quoted_string)
            return
        assert 'end_of_double_quoted_string' == typ
        xx("easy no problem. when the label ends on the same line. EASY")

    def parse_rest_of_inside_quoted_string():
        scn = build_scanner(line)
        sxs = _parse_rest_of_double_quoted_string_so_far(scn)
        typ, = sxs[-1]
        if 'end_of_surface_line' == typ:
            sxs.pop()  # #here1
            store['current_double_quoted_string_sexp'].extend(sxs)
            return  # stay
        assert 'end_of_double_quoted_string' == typ
        sxs.pop()  # #here1

        accum_sxs = store.pop('current_double_quoted_string_sexp')
        accum_sxs.extend(sxs)

        k = store.pop('current_attribute_name')

        val_sexp = 'double_quoted_string', accum_sxs
        store['current_attribute_list'].append((k, val_sexp))
        assert 'from_inside_double_quoted_string' == stack.pop().__name__
        assert 'from_before_attribute_value' == stack.pop().__name__  # 😢
        store['current_string_scanner'] = scn
        return parse_to_end_of_line()

    def parse_to_end_of_line():
        res = None
        while self.scn.more:
            action = find_transition()
            res = action()
            if res is None:
                continue
            # The only way you can produce something is at the end of the line
            assert self.scn.empty
            break
        store.pop('current_string_scanner')
        return res

    def parse_a_not_double_quoted_value():
        literal_value = self.scn.scan_required(identifier)
        k = store.pop('current_attribute_name')
        val_sexp = 'identifier_as_attribute_value', literal_value
        store['current_attribute_list'].append((k, val_sexp))
        assert 'from_before_attribute_value' == stack.pop().__name__

    # --

    def pop_out_of_attr_list_kinda_big_deal():
        assert 'from_inside_attribute_list' == stack[-1].__name__
        stack.pop()
        return finish_edge_or_node()

    def finish_edge_or_node():
        assert 'from_inside_digraph' == stack[-1].__name__  # or not
        typ = store.pop('current_entity_type')
        alist = store.pop('current_attribute_list')
        if 'edge' == typ:
            return finish_edge(alist)
        assert 'node' == typ
        return finish_node(alist)

    def finish_node(alist):
        iden = store.pop('current_node_identifier')
        this = _finish_alist(alist)  # ..
        use_lineno = store.pop('element_start_lineno')
        sx = _NodeSexp(iden, this, use_lineno)
        return 'yield_this', sx

    def finish_edge(alist):
        these = (store.pop('left_node_identifier'),
                 store.pop('left_node_port'),
                 store.pop('right_node_identifier'),
                 store.pop('right_node_port'))
        this = _finish_alist(alist)  # ..
        use_lineno = store.pop('element_start_lineno')
        sx = _EdgeSexp(*these, this, use_lineno)
        return 'yield_this', sx

    def close_digraph():
        assert 'from_inside_digraph' == stack[-1].__name__
        stack.pop()
        assert 'from_root_of_document' == stack[-1].__name__

    # --

    def enter_multiline_comment():
        push_to(from_inside_multiline_comment)

    def pop_out_of_multiline_comme():
        assert 'from_inside_multiline_comment' == stack[-1].__name__
        stack.pop()

    def ignore_for_now():
        pass

    do_nothing = ignore_for_now

    # == Tests

    def if_blank_line():
        return '\n' == line

    def if_open_multiline_comment_line():
        md = open_comment_simple_rx.match(line)
        if md is None:
            return
        pos = line.find('*/', 2)
        if -1 != pos:
            xx("ugh can we not have single-line comments please")
        return True

    def if_line_that_closes_multiline_comment():
        pos = line.find('*/')
        return -1 != pos

    def if_open_digraph_line():
        return re.match(f'digraph {iden_rsx}[ ]?\\{{$', line)

    def if_set_an_attribute_statement():
        return re.match(f'{iden_rsx}=', line)  # big meh

    def if_open_node_statement():
        md = looks_like_open_node_rx.match(line)
        if md is None:
            return
        store['last_match'] = md
        return True

    def if_edge_statement():
        md = huge_peek_for_edge_rx.match(line)
        if md is None:
            return
        store['last_match'] = md
        return True

    # -- tests that use scanner

    def if_scan_attribute_assignment_left_hand_side():
        scn = self.scn
        scn.skip(one_or_more_space_characters)  # #here2
        s = scn.scan(identifier)
        if s is None:
            return
        store['current_attribute_name'] = s
        scn.skip_required(equals)
        return True

    def if_scan_a_double_quote():
        return self.scn.skip(double_quote)

    def if_skip_a_comma():
        # assume #here2
        return self.scn.skip(comma)

    def if_scan_close_square_bracket():
        scn = self.scn
        yes = scn.skip(close_square_bracket)
        if not yes:
            return
        skip_required_end_of_line(scn)
        return True

    def skip_required_end_of_line(scn):
        if scn.skip(newline):
            assert scn.empty
            return
        xx(f"Maybe this is an end-of-line comment which is allowed: {scn.rest()}"
           )  # noqa: E501

    def if_close_clurly():
        return '}\n' == line  # meh

    def true():
        return True

    # == used below

    iden_rsx = '[a-zA-Z_][a-zA-Z0-9_]*'

    # ==

    def build_scanner(line):
        return StringScanner(line, tlistener, cstacker)

    def cstacker():
        return ({'line': line, 'lineno': lineno}, )

    from text_lib.magnetics.string_scanner_via_string import \
        StringScanner, \
        pattern_via_description_and_regex_string as o, \
        build_throwing_listener

    tlistener = build_throwing_listener(listener, _Stop)

    identifier = o('identifier', iden_rsx)
    open_square_bracket = o('open square bracket', r'\[')
    equals = o("equals sign", '=')
    double_quote = o("double quote", '"')
    comma = o('comma', ',')
    close_square_bracket = o('close square bracket', r'\]')
    one_or_more_space_characters = o('spaces', '[ ]+')
    newline = o('newline', r'\n')

    # ==

    import re

    looks_like_open_node_rx = re.compile(
        f"""
        (?P<node_identifier> {iden_rsx} )
        \\[
    """, re.VERBOSE)

    huge_peek_for_edge_rx = re.compile(
        f"""
        (?P<left_node_identifier> {iden_rsx} )
        (?: : (?P<left_node_port> {iden_rsx} ) )?
        ->
    """, re.VERBOSE)

    open_comment_simple_rx = re.compile(r'/\*')

    # == Interacting with FSA state:

    def move_to(state_function):
        stack[-1] = state_function

    def push_to(state_function):
        stack.append(state_function)

    # ==

    def find_transition():
        for test, action in stack[-1]():
            yn = test()
            if yn:
                return action

        reason_head = f"No transition found {stack[-1].__name__}"
        scn = store.get('current_string_scanner')
        if scn:

            def lines():
                yield ''.join(reason_head, '\n')
                yield "  {line}"
                yield ''.join(('  ', '-' * scn.pos, '^\n'))

            reason = ''.join(lines())
        else:
            reason = f"{reason_head} for {line!r}"
        xx(reason)

    # ==

    class HeyGuysWhatsUp:
        @property
        def scn(self):
            return store['current_string_scanner']

    self = HeyGuysWhatsUp()
    store = _NoClobberDict()
    stack = [from_beginning_state]

    lineno = 0
    try:
        for line in lines:
            lineno += 1
            while True:  # (there may be a 'redo' directive in the future)
                action = find_transition()
                direc = action()
                if direc is None:
                    break  # parse next line, if any
                typ = direc[0]
                assert 'yield_this' == typ
                product, = direc[1:]
                yield product
                break  # parse next line, if any
    except _Stop:
        return

    if 1 != len(stack):
        xx("something didn't close, can't end {stack[-1].__name__}")
Esempio n. 6
0
def _build_function_call_parser():
    def egads(string, listener):
        tlistener = build_throwing_listener(listener)
        scn = StringScanner(string, tlistener)
        return main(scn)

    def main(scn):
        # (there is a much simpler version of something like this at [#882.T])

        func_name = scn.scan_required(func_name_symbol)
        scn.skip_required(open_paren)

        def do_args():
            if scn.skip(close_paren):  # #here2
                return
            while True:
                yield do_one_arg(scn)
                if scn.skip(close_paren):  # #here2
                    return
                scn.skip_required(comma)

        arg_sexps = tuple(do_args())
        if scn.more:
            xx('cover extra characters after close')
        return func_name, arg_sexps

    def do_one_arg(scn):
        s = scn.scan(variable_name_symbol)
        if s:
            return 'variable_name', s

        s = scn.scan(hacky_mixed_value_sym)
        if s:
            return 'literal_value', s

        scn.whine_about_expecting(variable_name_symbol, hacky_mixed_value_sym)

        assert ()

    def build_throwing_listener(listener):
        def use_listener(sev, *rest):
            listener(sev, *rest)
            if 'error' == sev:
                raise stop()

        return use_listener

    from text_lib.magnetics.string_scanner_via_string import \
        StringScanner, \
        pattern_via_description_and_regex_string as o

    func_name_symbol = o('function_name', iden)
    open_paren = o('open_paren', r'\(')

    variable_name_symbol = o('variable_name', f"{iden}(?=[,)])")
    # (you need the forward lookahead assertion so you don't match foo.md 🙃)

    comma = o('comma', r',[ ]*')
    close_paren = o('close_paren', r'\)')
    hacky_mixed_value_sym = o('hacky_mixed_value', '[^,)]+')  # assume not var

    stop = _Stop

    return egads
Esempio n. 7
0
    content_s = scn.scan_required(_some_content)  # (Case1403) ⏛ [#873.5]

    if content_s is None:
        return

    if False and content_s[0] in ('"', "'"):
        # allow literal quotes in values since #history-B.6
        raise Exception(  # #not-covered
            "Can we please just not bother with quotes ever? "
            "It seems they may neve be necessary for us in these files "
            f"({repr(content_s)}")

    return _Field(field_name, content_s, posov)


_field_name = o('field name', r'[a-zA-Z][_a-zA-Z0-9]*')
# (real recsel doesn't allow multbyte in first char, or dashes anywhere)

_colon = o('colon', ':')

_space = o('space', '[ ]+')

_some_content = o('some content', r'[^\n]+')


class _Field:
    # property names are derived from names used in /usr/local/include/rec.h
    # however, we have inflected the names further with local conventions

    def __init__(self, nn, vv, posov):
        self.field_name = nn
def _build_element_parser(tlistener, path=None):
    def parse_element(ast):
        typ = ast[0]
        if 'node_expression' == typ:
            return parse_node(ast)
        assert 'edge_expression' == typ
        return 'my_edge_def', ast  # it's fine as-is as a forward def

    def parse_node(ast):

        label = ast.attributes['label']
        cstacker = cstacker_via_AST(ast)
        scn = StringScanner(label, tlistener, cstacker)
        table_name = scn.scan_required(identifier)
        scn.skip_required(pipe_and_newline)
        cstacker.plus += 1

        my_cols = []
        while True:
            my_col = parse_column_definition(scn)
            cstacker.plus += 1
            my_cols.append(my_col)
            if scn.empty:
                break
        return 'my_table_def', _MyTableDef(table_name, my_cols, ast)

    def parse_column_definition(scn):

        # Parse any port name
        port_name = None
        if scn.skip(less_than):
            port_name = scn.scan_required(identifier)
            scn.skip_required(greater_than)
            scn.skip_required(space)

        # Parse the column name and type (very strict for now)
        col_name = scn.scan_required(identifier)
        scn.skip_required(space)
        col_abs_typ = scn.scan_required(abstract_types)

        # Constraints
        kw = {'is_prim': False, 'null_OK': False, 'is_uniq': False}
        pool = {'is_prim': primary, 'is_uniq': unique, 'null_OK': null_ok}  # o

        def find_first_one():
            for k, v in pool.items():
                yn = scn.scan(v)
                if yn:
                    return k

        while pool:
            # Do you match any constraint in the pool from this point?
            k = find_first_one()

            # If you matched no constraints, forget the pool, you're done
            if k is None:
                break

            # (special handling for this one that's a two-token sequence meh)
            if 'is_prim' == k:
                scn.skip_required(key_token)

            # While every attribute is false by default this is easier
            assert kw[k] is False
            kw[k] = True

            # Keep looking for more constraints as long as you have unused ones
            pool.pop(k)

        w = scn.skip(end_of_column_def)
        if w is None:
            oh_boy = (primary, unique, null_ok, end_of_column_def)
            scn.whine_about_expecting(*oh_boy)

        return _MyColDef(port_name, col_name, col_abs_typ, **kw)

    def cstacker_via_AST(ast):
        def cstacker():
            dct = {}
            dct['lineno'] = (ast.lineno + cstacker.plus)
            if path:
                dct['path'] = path
            return (dct, )

        cstacker.plus = 0
        return cstacker

    from text_lib.magnetics.string_scanner_via_string import \
        StringScanner, pattern_via_description_and_regex_string as o

    # ==

    # Port
    less_than = o('less_than than', '<')
    greater_than = o('greater than', '>')

    # (The below follow the order of here just because:)
    # https://www.sqlite.org/lang_createtable.html

    # Type
    abstract_types = o("'int' or 'text'", '(?:int|text)')

    # Primary or "null OK" or Unique
    primary = o('primary', r'[ ]primary\b')
    null_ok = o('null_ok', r'[ ]null_ok\b')
    unique = o('unique', r'[ ]unique\b')
    key_token = o('key', r'[ ]key\b')

    # Common
    identifier = o('identifier',
                   '[a-zA-Z][a-zA-Z0-9]*(?:_[a-zA-Z][a-zA-Z0-9]*)*')
    space = o('space', '[ ]')
    pipe_and_newline = o('pipe and newline', r'\|\n')  # redundant w/ next
    end_of_column_def = o('pipe and newline or end of string', r'(?:\|\n|$)')

    # ==

    return parse_element