Beispiel #1
0
    def __init__(self):
        # build grammar according to Glue.Client.get_partitions(Expression)
        lpar, rpar = map(Suppress, "()")

        # NOTE these are AWS Athena column name best practices
        ident = Forward().set_name("ident")
        ident <<= Word(alphanums + "._").set_parse_action(_Ident) | lpar + ident + rpar

        number = Forward().set_name("number")
        number <<= pyparsing_common.number | lpar + number + rpar

        string = Forward().set_name("string")
        string <<= QuotedString(quote_char="'", esc_quote="''") | lpar + string + rpar

        literal = (number | string).set_name("literal")
        literal_list = delimited_list(literal, min=1).set_name("list")

        bin_op = one_of("<> >= <= > < =").set_name("binary op")

        and_ = Forward()
        and_ <<= CaselessKeyword("and") | lpar + and_ + rpar

        or_ = Forward()
        or_ <<= CaselessKeyword("or") | lpar + or_ + rpar

        in_, between, like, not_, is_, null = map(
            CaselessKeyword, "in between like not is null".split()
        )
        not_ = Suppress(not_)  # only needed for matching

        cond = (
            (ident + is_ + null).set_parse_action(_IsNull)
            | (ident + is_ + not_ + null).set_parse_action(_IsNotNull)
            | (ident + bin_op + literal).set_parse_action(_BinOp)
            | (ident + like + string).set_parse_action(_Like)
            | (ident + not_ + like + string).set_parse_action(_NotLike)
            | (ident + in_ + lpar + literal_list + rpar).set_parse_action(_In)
            | (ident + not_ + in_ + lpar + literal_list + rpar).set_parse_action(_NotIn)
            | (ident + between + literal + and_ + literal).set_parse_action(_Between)
            | (ident + not_ + between + literal + and_ + literal).set_parse_action(
                _NotBetween
            )
        ).set_name("cond")

        # conditions can be joined using 2-ary AND and/or OR
        expr = infix_notation(
            cond,
            [
                (and_, 2, OpAssoc.LEFT, _BoolAnd),
                (or_, 2, OpAssoc.LEFT, _BoolOr),
            ],
        )
        self._expr = expr.set_name("expr")

        self._cache: Dict[str, _Expr] = {}
Beispiel #2
0
# Single term, either field specific or not
TERM = (FIELD + OPERATOR + (RANGE | STRING)) | STRING

# Multi term with or without operator
QUERY = Optional(
    infix_notation(
        TERM,
        [
            (
                NOT,
                1,
                OpAssoc.RIGHT,
            ),
            (
                AND,
                2,
                OpAssoc.LEFT,
            ),
            (
                OR,
                2,
                OpAssoc.LEFT,
            ),
        ],
    ))

# Helper parsing objects


class RegexExpr:
    def __init__(self, tokens):
Beispiel #3
0
    def: 𒁴𒈫
"""

# uncomment to show parse-time debugging
# pp.enable_diag(pp.Diagnostics.enable_debug_on_named_expressions)

# define a MINIMAL Python parser
LPAR, RPAR, COLON, EQ = map(pp.Suppress, "():=")
def_ = pp.Keyword("𒁴𒈫", ident_chars=Cuneiform.identbodychars).set_name("def")
any_keyword = def_
ident = (~any_keyword) + pp.Word(
    Cuneiform.identchars, Cuneiform.identbodychars, asKeyword=True)
str_expr = pp.infix_notation(
    pp.QuotedString('"') | pp.common.integer,
    [
        ("*", 2, pp.OpAssoc.LEFT),
        ("+", 2, pp.OpAssoc.LEFT),
    ],
)

rvalue = pp.Forward()
fn_call = (ident +
           pp.Group(LPAR + pp.Optional(rvalue) + RPAR)).set_name("fn_call")

rvalue <<= fn_call | ident | str_expr | pp.common.number
assignment_stmt = ident + EQ + rvalue

stmt = pp.Group(fn_call | assignment_stmt).set_name("stmt")

fn_def = pp.Group(def_ + ident + pp.Group(LPAR + pp.Optional(rvalue) + RPAR) +
                  COLON).set_name("fn_def")
Beispiel #4
0
street_address = pp.Group(
    integer("house_number") +
    word[1, ...]("street_name")).setName("street_address")
time = pp.Regex(r"\d\d:\d\d")

grammar = (
    pp.Group(integer[1, ...]) +
    (ppc.ipv4_address
     & word("header_word")
     & pp.Optional(time)).setName("header with various elements")("header") +
    street_address("address") + pp.Group(pp.counted_array(word)) +
    pp.Group(integer * 8)("data") +
    pp.Group(pp.Word("abc") + pp.Word("def") * 3) +
    pp.infix_notation(integer, [
        (plus_minus().setName("pos_neg"), 1, pp.opAssoc.RIGHT),
        (mult_div, 2, pp.opAssoc.LEFT),
        (plus_minus, 2, pp.opAssoc.LEFT),
    ]).setName("simple_arithmetic") + ... +
    pp.Group(ppc.ipv4_address)("ip_address")).setName("grammar")

grammar.create_diagram("railroad_diagram_demo.html",
                       vertical=6,
                       show_results_names=True)

test = """\
    1 2 3 
    ABC 1.2.3.4 12:45
    123 Main St 
    4
    abc def ghi jkl 
    5 5 5 5 5 5 5 5