Beispiel #1
0
def test__parser__grammar_oneof_take_longest_match(seg_list):
    """Test that the OneOf grammar takes the longest match."""
    fooRegex = RegexParser(r"fo{2}", KeywordSegment)
    baar = StringParser("baar", KeywordSegment)
    foo = StringParser("foo", KeywordSegment)
    fooBaar = Sequence(
        foo,
        baar,
    )

    # Even if fooRegex comes first, fooBaar
    # is a longer match and should be taken
    g = OneOf(fooRegex, fooBaar)
    with RootParseContext(dialect=None) as ctx:
        assert fooRegex.match(seg_list[2:], parse_context=ctx).matched_segments == (
            KeywordSegment("foo", seg_list[2].pos_marker),
        )
        assert g.match(seg_list[2:], parse_context=ctx).matched_segments == (
            KeywordSegment("foo", seg_list[2].pos_marker),
            KeywordSegment("baar", seg_list[3].pos_marker),
        )
Beispiel #2
0
def test__parser__grammar_oneof_take_first(seg_list):
    """Test that the OneOf grammar takes first match in case they are of same length."""
    fooRegex = RegexParser(r"fo{2}", KeywordSegment)
    foo = StringParser("foo", KeywordSegment)

    # Both segments would match "foo"
    # so we test that order matters
    g1 = OneOf(fooRegex, foo)
    g2 = OneOf(foo, fooRegex)
    with RootParseContext(dialect=None) as ctx:
        assert g1.match(seg_list[2:], parse_context=ctx).matched_segments == (
            KeywordSegment("foo", seg_list[2].pos_marker),
        )
        assert g2.match(seg_list[2:], parse_context=ctx).matched_segments == (
            KeywordSegment("foo", seg_list[2].pos_marker),
        )
Beispiel #3
0
class SamplingExpressionSegment(BaseSegment):
    """A sampling expression."""

    type = "sample_expression"
    match_grammar = Sequence(
        "TABLESAMPLE",
        Bracketed(
            OneOf(
                Sequence(
                    "BUCKET",
                    Ref("NumericLiteralSegment"),
                    "OUT",
                    "OF",
                    Ref("NumericLiteralSegment"),
                    Sequence(
                        "ON",
                        OneOf(
                            Ref("SingleIdentifierGrammar"),
                            Ref("FunctionSegment"),
                        ),
                        optional=True,
                    ),
                ),
                Sequence(
                    Ref("NumericLiteralSegment"),
                    OneOf("PERCENT", "ROWS", optional=True),
                ),
                RegexParser(
                    r"\d+[bBkKmMgG]",
                    CodeSegment,
                    type="byte_length_literal",
                ),
            ),
        ),
        Ref(
            "AliasExpressionSegment",
            optional=True,
        ),
    )
        RegexLexer("atsign_literal", r"@[a-zA-Z_][\w]*", CodeSegment),
        RegexLexer("dollar_literal", r"[$][a-zA-Z0-9_.]*", CodeSegment),
    ],
    before="not_equal",
)

exasol_fs_dialect.add(
    FunctionScriptTerminatorSegment=NamedParser("function_script_terminator",
                                                CodeSegment,
                                                type="statement_terminator"),
    WalrusOperatorSegment=NamedParser("walrus_operator",
                                      SymbolSegment,
                                      type="assignment_operator"),
    VariableNameSegment=RegexParser(
        r"[A-Z][A-Z0-9_]*",
        CodeSegment,
        name="function_variable",
        type="variable",
    ),
)

exasol_fs_dialect.replace(SemicolonSegment=StringParser(";",
                                                        SymbolSegment,
                                                        name="semicolon",
                                                        type="semicolon"), )


@exasol_fs_dialect.segment(replace=True)
class StatementSegment(BaseSegment):
    """A generic segment, to any of its child subsegments."""

    type = "statement"
        Sequence("FOR",
                 "SYSTEM_TIME",
                 "AS",
                 "OF",
                 Ref("ExpressionSegment"),
                 optional=True),
        Sequence("WITH",
                 "OFFSET",
                 "AS",
                 Ref("SingleIdentifierGrammar"),
                 optional=True),
    ),
    FunctionNameIdentifierSegment=RegexParser(
        # In BigQuery struct() has a special syntax, so we don't treat it as a function
        r"[A-Z][A-Z0-9_]*",
        CodeSegment,
        name="function_name_identifier",
        type="function_name_identifier",
        anti_template=r"STRUCT",
    ),
)


@bigquery_dialect.segment(replace=True)
class FunctionDefinitionGrammar(BaseSegment):
    """This is the body of a `CREATE FUNCTION AS` statement."""

    match_grammar = Sequence(
        AnyNumberOf(
            Sequence(
                "LANGUAGE",
                # Not really a parameter, but best fit for now.
Beispiel #6
0
    "SAMPLE",
    "TABLESAMPLE",
    "UNPIVOT",
])

snowflake_dialect.add(
    # In snowflake, these are case sensitive even though they're not quoted
    # so they need a different `name` and `type` so they're not picked up
    # by other rules.
    ParameterAssignerSegment=StringParser("=>",
                                          SymbolSegment,
                                          name="parameter_assigner",
                                          type="parameter_assigner"),
    NakedSemiStructuredElementSegment=RegexParser(
        r"[A-Z0-9_]*",
        CodeSegment,
        name="naked_semi_structured_element",
        type="semi_structured_element",
    ),
    QuotedSemiStructuredElementSegment=NamedParser(
        "double_quote",
        CodeSegment,
        name="quoted_semi_structured_element",
        type="semi_structured_element",
    ),
    ColumnIndexIdentifierSegment=RegexParser(
        r"\$[0-9]+",
        CodeSegment,
        name="column_index_identifier_segment",
        type="identifier",
    ),
)
Beispiel #7
0
    ProcedureParameterGrammar=OneOf(
        Sequence(
            OneOf(
                Ref("OutputParameterSegment"),
                Ref("InputParameterSegment"),
                Ref("InputOutputParameterSegment"),
                optional=True,
            ),
            Ref("ParameterNameSegment", optional=True),
            Ref("DatatypeSegment"),
        ),
        Ref("DatatypeSegment"),
    ),
    LocalVariableNameSegment=RegexParser(
        r"`?[a-zA-Z0-9_]*`?",
        CodeSegment,
        name="declared_variable",
        type="variable",
    ),
    SessionVariableNameSegment=RegexParser(
        r"[@][a-zA-Z0-9_]*",
        CodeSegment,
        name="declared_variable",
        type="variable",
    ),
)

mysql_dialect.replace(
    DelimiterSegment=OneOf(Ref("SemicolonSegment"), Ref("TildeSegment")),
    TildeSegment=StringParser(
        "~", SymbolSegment, name="tilde", type="statement_terminator"
    ),
Beispiel #8
0
 RcfileKeywordSegment=StringParser("RCFILE",
                                   KeywordSegment,
                                   type="file_format"),
 SequencefileKeywordSegment=StringParser("SEQUENCEFILE",
                                         KeywordSegment,
                                         type="file_format"),
 TextfileKeywordSegment=StringParser("TEXTFILE",
                                     KeywordSegment,
                                     type="file_format"),
 PropertyGrammar=Sequence(
     Ref("QuotedLiteralSegment"),
     Ref("EqualsSegment"),
     Ref("QuotedLiteralSegment"),
 ),
 LocationGrammar=Sequence("LOCATION", Ref("S3UrlGrammar")),
 S3UrlGrammar=RegexParser(r"^'s3://.*", RawSegment),
 BracketedPropertyListGrammar=Bracketed(Delimited(Ref("PropertyGrammar"))),
 CTASPropertyGrammar=Sequence(
     OneOf(
         "external_location",
         "format",
         "partitioned_by",
         "bucketed_by",
         "bucket_count",
         "write_compression",
         "orc_compression",
         "parquet_compression",
         "field_delimiter",
     ),
     Ref("EqualsSegment"),
     Ref("LiteralGrammar"),
Beispiel #9
0
     allow_trailing=True,
 ),
 QuestionMarkSegment=StringParser(
     "?", SymbolSegment, name="question_mark", type="question_mark"
 ),
 AtSignLiteralSegment=NamedParser(
     "atsign_literal",
     CodeSegment,
     name="atsign_literal",
     type="literal",
     trim_chars=("@",),
 ),
 # Add a Full equivalent which also allow keywords
 NakedIdentifierSegmentFull=RegexParser(
     r"[A-Z_][A-Z0-9_]*",
     CodeSegment,
     name="naked_identifier_all",
     type="identifier",
 ),
 SingleIdentifierGrammarFull=OneOf(
     Ref("NakedIdentifierSegment"),
     Ref("QuotedIdentifierSegment"),
     Ref("NakedIdentifierSegmentFull"),
 ),
 DefaultDeclareOptionsGrammar=Sequence(
     "DEFAULT",
     OneOf(
         Ref("LiteralGrammar"),
         Bracketed(Ref("SelectStatementSegment")),
         Ref("BareFunctionSegment"),
         Ref("FunctionSegment"),
         Ref("ArrayLiteralSegment"),
Beispiel #10
0
     Ref("QuotedIdentifierSegment"),
     Ref("BracketedIdentifierSegment"),
 ),
 LiteralGrammar=OneOf(
     Ref("QuotedLiteralSegment"),
     Ref("QuotedLiteralSegmentWithN"),
     Ref("NumericLiteralSegment"),
     Ref("BooleanLiteralGrammar"),
     Ref("QualifiedNumericLiteralSegment"),
     # NB: Null is included in the literals, because it is a keyword which
     # can otherwise be easily mistaken for an identifier.
     Ref("NullLiteralSegment"),
     Ref("DateTimeLiteralGrammar"),
 ),
 ParameterNameSegment=RegexParser(r"[@][A-Za-z0-9_]+",
                                  CodeSegment,
                                  name="parameter",
                                  type="parameter"),
 FunctionNameIdentifierSegment=RegexParser(
     r"[A-Z][A-Z0-9_]*|\[[A-Z][A-Z0-9_]*\]",
     CodeSegment,
     name="function_name_identifier",
     type="function_name_identifier",
 ),
 DatatypeIdentifierSegment=Ref("SingleIdentifierGrammar"),
 PrimaryKeyGrammar=Sequence(
     "PRIMARY", "KEY", OneOf("CLUSTERED", "NONCLUSTERED", optional=True)),
 FromClauseTerminatorGrammar=OneOf(
     "WHERE",
     "LIMIT",
     "GROUP",
     "ORDER",
Beispiel #11
0
            bracket_pairs_set="angle_bracket_pairs",
        ),
    ),
    # BigQuery also supports the special "Struct" construct.
    BaseExpressionElementGrammar=ansi_dialect.get_grammar(
        "BaseExpressionElementGrammar").copy(
            insert=[Ref("TypelessStructSegment")]),
    FunctionContentsGrammar=ansi_dialect.get_grammar(
        "FunctionContentsGrammar").copy(
            insert=[Ref("TypelessStructSegment")],
            before=Ref("ExpressionSegment"),
        ),
    # BigQuery allows underscore in parameter names, and also anything if quoted in backticks
    ParameterNameSegment=OneOf(
        RegexParser(r"[A-Z_][A-Z0-9_]*",
                    CodeSegment,
                    name="parameter",
                    type="parameter"),
        RegexParser(r"`[^`]*`",
                    CodeSegment,
                    name="parameter",
                    type="parameter"),
    ),
    DateTimeLiteralGrammar=Nothing(),
)

# Set Keywords
bigquery_dialect.sets("unreserved_keywords").clear()
bigquery_dialect.sets("unreserved_keywords").update(
    [n.strip().upper() for n in bigquery_unreserved_keywords.split("\n")])

bigquery_dialect.sets("reserved_keywords").clear()