Beispiel #1
0
    def _parse_tokens(
        tokens: Sequence[BaseSegment], config: FluffConfig, recurse: bool = True
    ) -> Tuple[Optional[BaseSegment], List[SQLParseError]]:
        parser = Parser(config=config)
        violations = []
        # Parse the file and log any problems
        try:
            parsed: Optional[BaseSegment] = parser.parse(tokens, recurse=recurse)
        except SQLParseError as err:
            linter_logger.info("PARSING FAILED! : %s", err)
            violations.append(err)
            return None, violations

        if parsed:
            linter_logger.info("\n###\n#\n# {}\n#\n###".format("Parsed Tree:"))
            linter_logger.info("\n" + parsed.stringify())
            # We may succeed parsing, but still have unparsable segments. Extract them here.
            for unparsable in parsed.iter_unparsables():
                # No exception has been raised explicitly, but we still create one here
                # so that we can use the common interface
                violations.append(
                    SQLParseError(
                        "Line {0[0]}, Position {0[1]}: Found unparsable section: {1!r}".format(
                            unparsable.pos_marker.working_loc,
                            unparsable.raw
                            if len(unparsable.raw) < 40
                            else unparsable.raw[:40] + "...",
                        ),
                        segment=unparsable,
                    )
                )
                linter_logger.info("Found unparsable segment...")
                linter_logger.info(unparsable.stringify())
        return parsed, violations
Beispiel #2
0
def test_bigquery_relational_operator_parsing(data):
    """Tests queries with a diverse mixture of relational operators."""
    # Generate a simple SELECT query with relational operators and conjunctions
    # as specified in 'data'. Note the conjunctions are used as separators
    # between comparisons, sn the conjunction in the first item is not used.
    filter = []
    for i, (relation, conjunction) in enumerate(data):
        if i:
            filter.append(f" {conjunction} ")
        filter.append(f"a {relation} b")
    raw = f'SELECT * FROM t WHERE {"".join(filter)}'
    note(f"query: {raw}")
    # Load the right dialect
    config = FluffConfig(overrides=dict(dialect="bigquery"))
    tokens, lex_vs = Lexer(config=config).lex(raw)
    # From just the initial parse, check we're all there
    assert "".join(token.raw for token in tokens) == raw
    # Check we don't have lexing issues
    assert not lex_vs

    # Do the parse WITHOUT lots of logging
    # The logs get too long here to be useful. We should use
    # specfic segment tests if we want to debug logs.
    parsed = Parser(config=config).parse(tokens)
    print("Post-parse structure: {0}".format(parsed.to_tuple(show_raw=True)))
    print("Post-parse structure: {0}".format(parsed.stringify()))
    # Check we're all there.
    assert parsed.raw == raw
    # Check that there's nothing un parsable
    typs = parsed.type_set()
    assert "unparsable" not in typs
Beispiel #3
0
def test__dialect__base_parse_struct(dialect, sqlfile, code_only, yamlfile,
                                     yaml_loader):
    """For given test examples, check parsed structure against yaml."""
    # Load the right dialect
    config = FluffConfig(overrides=dict(dialect=dialect))
    # Load the SQL
    raw = load_file(dialect, sqlfile)
    # Lex and parse the file
    tokens, _ = Lexer(config=config).lex(raw)
    parsed = Parser(config=config).parse(tokens)
    # Load the YAML
    res = yaml_loader(make_dialect_path(dialect, yamlfile))
    assert parsed.to_tuple(code_only=code_only, show_raw=True) == res
Beispiel #4
0
def test_bigquery_table_reference_segment_iter_raw_references(
        table_reference, reference_parts):
    """Tests BigQuery override of TableReferenceSegment.iter_raw_references().

    The BigQuery implementation is more complex, handling:
    - hyphenated table references
    - quoted or not quoted table references
    """
    query = f"SELECT bar.user_id FROM {table_reference}"
    config = FluffConfig(overrides=dict(dialect="bigquery"))
    tokens, lex_vs = Lexer(config=config).lex(query)
    parsed = Parser(config=config).parse(tokens)
    for table_reference in parsed.recursive_crawl("table_reference"):
        actual_reference_parts = [
            orp.part for orp in table_reference.iter_raw_references()
        ]
        assert reference_parts == actual_reference_parts
Beispiel #5
0
def parse_example_file(dialect: str, sqlfile: str):
    """Parse example SQL file, return parse tree."""
    config = FluffConfig(overrides=dict(dialect=dialect))
    # Load the SQL
    raw = load_file(dialect, sqlfile)
    # Lex and parse the file
    tokens, _ = Lexer(config=config).lex(raw)
    tree = Parser(config=config).parse(tokens)
    return tree
Beispiel #6
0
def test__dialect__base_file_parse(dialect, file):
    """For given test examples, check successful parsing."""
    raw = load_file(dialect, file)
    # Load the right dialect
    config = FluffConfig(overrides=dict(dialect=dialect))
    tokens, lex_vs = Lexer(config=config).lex(raw)
    # From just the initial parse, check we're all there
    assert "".join(token.raw for token in tokens) == raw
    # Check we don't have lexing issues
    assert not lex_vs

    # Do the parse WITHOUT lots of logging
    # The logs get too long here to be useful. We should use
    # specfic segment tests if we want to debug logs.
    if raw:
        parsed = Parser(config=config).parse(tokens)
        print("Post-parse structure: {0}".format(
            parsed.to_tuple(show_raw=True)))
        print("Post-parse structure: {0}".format(parsed.stringify()))
        # Check we're all there.
        assert parsed.raw == raw
        # Check that there's nothing un parsable
        typs = parsed.type_set()
        assert "unparsable" not in typs
    else:
        # If it's an empty file, check that we get a value exception
        # here. The linter handles this by *not* parsing the file,
        # but in this case, we explicitly want an error.
        with pytest.raises(ValueError):
            Parser(config=config).parse(tokens)
Beispiel #7
0
def test__dialect__base_file_parse(dialect, file):
    """For given test examples, check successful parsing."""
    raw = load_file(dialect, file)
    # Load the right dialect
    config = FluffConfig(overrides=dict(dialect=dialect))
    tokens, lex_vs = Lexer(config=config).lex(raw)
    # From just the initial parse, check we're all there
    assert "".join(token.raw for token in tokens) == raw
    # Check we don't have lexing issues
    assert not lex_vs

    # Do the parse WITHOUT lots of logging
    # The logs get too long here to be useful. We should use
    # specfic segment tests if we want to debug logs.
    parsed = Parser(config=config).parse(tokens)
    print("Post-parse structure: {0}".format(parsed.to_tuple(show_raw=True)))
    print("Post-parse structure: {0}".format(parsed.stringify()))
    # Check we're all there.
    assert parsed.raw == raw
    # Check that there's nothing un parsable
    typs = parsed.type_set()
    assert "unparsable" not in typs
Beispiel #8
0
def lex_and_parse(config_overrides: Dict[str, Any], raw: str) -> Optional[BaseSegment]:
    """Performs a Lex and Parse, with cachable inputs within fixture."""
    # Load the right dialect
    config = FluffConfig(overrides=config_overrides)
    tokens, lex_vs = Lexer(config=config).lex(raw)
    # From just the initial parse, check we're all there
    assert "".join(token.raw for token in tokens) == raw
    # Check we don't have lexing issues
    assert not lex_vs
    # TODO: Handle extremely verbose logging
    # temp - use negative grep: | grep -v "INFO\|DEBUG\|\[L\|#\|Initial\|^$"
    # better maybe - https://docs.pytest.org/en/6.2.x/logging.html#caplog-fixture

    if not raw:
        return None

    return Parser(config=config).parse(tokens)
Beispiel #9
0
from sqlfluff.core.parser import Parser, Lexer
from sqlfluff.core import FluffConfig
from sqlfluff.cli.commands import quoted_presenter

from dialects.parse_fixtures import get_parse_fixtures, load_file

yaml.add_representer(str, quoted_presenter)

parse_success_examples, _ = get_parse_fixtures()

for example in parse_success_examples:
    dialect, sqlfile = example
    config = FluffConfig(overrides=dict(dialect=dialect))
    # Load the SQL
    raw = load_file(dialect, sqlfile)
    # Lex and parse the file
    tokens, _ = Lexer(config=config).lex(raw)
    tree = Parser(config=config).parse(tokens)
    r = None
    if tree:
        r = tree.as_record(code_only=True, show_raw=True)
    # Remove the .sql file extension
    root = sqlfile[:-4]
    path = os.path.join("test", "fixtures", "parser", dialect, root + ".yml")
    with open(path, "w", newline="\n") as f:
        if r:
            yaml.dump(r, f, default_flow_style=False)
        else:
            f.write("")