Python Lexer Exemples, sqlfluff.core.parser.Lexer Python Exemples

Exemple #1

0

Afficher le fichier

def lex_and_parse(config_overrides: Dict[str, Any], raw: str) -> Optional[BaseSegment]:
    """Performs a Lex and Parse, with cachable inputs within fixture."""
    # Load the right dialect
    config = FluffConfig(overrides=config_overrides)
    tokens, lex_vs = Lexer(config=config).lex(raw)
    # From just the initial parse, check we're all there
    assert "".join(token.raw for token in tokens) == raw
    # Check we don't have lexing issues
    assert not lex_vs
    # TODO: Handle extremely verbose logging
    # temp - use negative grep: | grep -v "INFO\|DEBUG\|\[L\|#\|Initial\|^$"
    # better maybe - https://docs.pytest.org/en/6.2.x/logging.html#caplog-fixture

    if not raw:
        return None

    return Parser(config=config).parse(tokens)

Exemple #2

0

Afficher le fichier

Fichier : bigquery_test.py Projet : sqlfluff/sqlfluff

def test_bigquery_table_reference_segment_iter_raw_references(
        table_reference, reference_parts):
    """Tests BigQuery override of TableReferenceSegment.iter_raw_references().

    The BigQuery implementation is more complex, handling:
    - hyphenated table references
    - quoted or not quoted table references
    """
    query = f"SELECT bar.user_id FROM {table_reference}"
    config = FluffConfig(overrides=dict(dialect="bigquery"))
    tokens, lex_vs = Lexer(config=config).lex(query)
    parsed = Parser(config=config).parse(tokens)
    for table_reference in parsed.recursive_crawl("table_reference"):
        actual_reference_parts = [
            orp.part for orp in table_reference.iter_raw_references()
        ]
        assert reference_parts == actual_reference_parts

Exemple #3

0

Afficher le fichier

Fichier : dialects_test.py Projet : scrambldchannel/sqlfluff

def test__dialect__base_parse_struct(
    dialect, sqlfile, code_only, yamlfile, yaml_loader
):
    """For given test examples, check parsed structure against yaml."""
    # Load the right dialect
    config = FluffConfig(overrides=dict(dialect=dialect))
    # Load the SQL
    raw = load_file(dialect, sqlfile)
    # Lex and parse the file
    tokens, _ = Lexer(config=config).lex(raw)
    parsed = Parser(config=config).parse(tokens)
    # Load the YAML
    res = yaml_loader(make_dialect_path(dialect, yamlfile))
    if parsed:
        assert parsed.to_tuple(code_only=code_only, show_raw=True) == res
    else:
        assert parsed == res

Exemple #4

0

Afficher le fichier

def test__dialect__base_file_parse(dialect, file):
    """For given test examples, check successful parsing."""
    raw = load_file(dialect, file)
    # Load the right dialect
    config = FluffConfig(overrides=dict(dialect=dialect))
    tokens, lex_vs = Lexer(config=config).lex(raw)
    # From just the initial parse, check we're all there
    assert "".join(token.raw for token in tokens) == raw
    # Check we don't have lexing issues
    assert not lex_vs

    # Do the parse WITHOUT lots of logging
    # The logs get too long here to be useful. We should use
    # specfic segment tests if we want to debug logs.
    parsed = Parser(config=config).parse(tokens)
    print("Post-parse structure: {0}".format(parsed.to_tuple(show_raw=True)))
    print("Post-parse structure: {0}".format(parsed.stringify()))
    # Check we're all there.
    assert parsed.raw == raw
    # Check that there's nothing un parsable
    typs = parsed.type_set()
    assert "unparsable" not in typs

Exemple #5

0

Afficher le fichier

Fichier : lexer_test.py Projet : sti0/sqlfluff

def test__parser__lexer_trim_post_subdivide(caplog):
    """Test a RegexLexer with a trim_post_subdivide function."""
    matcher = [
        RegexLexer(
            "function_script_terminator",
            r";\s+(?!\*)\/(?!\*)|\s+(?!\*)\/(?!\*)",
            CodeSegment,
            segment_kwargs={"type": "function_script_terminator"},
            subdivider=StringLexer(
                "semicolon", ";", CodeSegment, segment_kwargs={"type": "semicolon"}
            ),
            trim_post_subdivide=RegexLexer(
                "newline",
                r"(\n|\r\n)+",
                NewlineSegment,
            ),
        )
    ]
    with caplog.at_level(logging.DEBUG):
        res = Lexer.lex_match(";\n/\n", matcher)
        assert res.elements[0].raw == ";"
        assert res.elements[1].raw == "\n"
        assert res.elements[2].raw == "/"
        assert len(res.elements) == 3

Exemple #6

0

Afficher le fichier

from sqlfluff.core.parser import Parser, Lexer
from sqlfluff.core import FluffConfig
from sqlfluff.cli.commands import quoted_presenter

from dialects.parse_fixtures import get_parse_fixtures, load_file

yaml.add_representer(str, quoted_presenter)

parse_success_examples, _ = get_parse_fixtures()

for example in parse_success_examples:
    dialect, sqlfile = example
    config = FluffConfig(overrides=dict(dialect=dialect))
    # Load the SQL
    raw = load_file(dialect, sqlfile)
    # Lex and parse the file
    tokens, _ = Lexer(config=config).lex(raw)
    tree = Parser(config=config).parse(tokens)
    r = None
    if tree:
        r = tree.as_record(code_only=True, show_raw=True)
    # Remove the .sql file extension
    root = sqlfile[:-4]
    path = os.path.join("test", "fixtures", "parser", dialect, root + ".yml")
    with open(path, "w", newline="\n") as f:
        if r:
            yaml.dump(r, f, default_flow_style=False)
        else:
            f.write("")

Exemple #7

0

Afficher le fichier

Fichier : lexer_test.py Projet : sti0/sqlfluff

def test__parser__lexer_obj(raw, res, caplog):
    """Test the lexer splits as expected in a selection of cases."""
    lex = Lexer(config=FluffConfig())
    with caplog.at_level(logging.DEBUG):
        lexing_segments, _ = lex.lex(raw)
        assert [seg.raw for seg in lexing_segments] == res

Exemple #8

0

Afficher le fichier

    def _lex_templated_file(
        templated_file: TemplatedFile, config: FluffConfig
    ) -> Tuple[Optional[Sequence[BaseSegment]], List[SQLLexError],
               FluffConfig]:
        """Lex a templated file.

        NOTE: This potentially mutates the config, so make sure to
        use the returned one.
        """
        violations = []
        linter_logger.info("LEXING RAW (%s)", templated_file.fname)
        # Get the lexer
        lexer = Lexer(config=config)
        # Lex the file and log any problems
        try:
            tokens, lex_vs = lexer.lex(templated_file)
            # We might just get the violations as a list
            violations += lex_vs
            linter_logger.info("Lexed tokens: %s",
                               [seg.raw for seg in tokens] if tokens else None)
        except SQLLexError as err:
            linter_logger.info("LEXING FAILED! (%s): %s", templated_file.fname,
                               err)
            violations.append(err)
            return None, violations, config

        if not tokens:  # pragma: no cover TODO?
            return None, violations, config

        # Check that we've got sensible indentation from the lexer.
        # We might need to suppress if it's a complicated file.
        templating_blocks_indent = config.get("template_blocks_indent",
                                              "indentation")
        if isinstance(templating_blocks_indent, str):
            force_block_indent = templating_blocks_indent.lower().strip(
            ) == "force"
        else:
            force_block_indent = False
        templating_blocks_indent = bool(templating_blocks_indent)
        # If we're forcing it through we don't check.
        if templating_blocks_indent and not force_block_indent:
            indent_balance = sum(
                getattr(elem, "indent_val", 0)
                for elem in cast(Tuple[BaseSegment, ...], tokens))
            if indent_balance != 0:
                linter_logger.debug(
                    "Indent balance test failed for %r. Template indents will not be "
                    "linted for this file.",
                    templated_file.fname,
                )
                # Don't enable the templating blocks.
                templating_blocks_indent = False

        # The file will have been lexed without config, so check all indents
        # are enabled.
        new_tokens = []
        for token in cast(Tuple[BaseSegment, ...], tokens):
            if token.is_meta:
                token = cast(MetaSegment, token)
                if token.indent_val != 0:
                    # Don't allow it if we're not linting templating block indents.
                    if not templating_blocks_indent:
                        continue
            new_tokens.append(token)

        # Return new buffer
        return new_tokens, violations, config