def lex_and_parse(config_overrides: Dict[str, Any], raw: str) -> Optional[BaseSegment]: """Performs a Lex and Parse, with cachable inputs within fixture.""" # Load the right dialect config = FluffConfig(overrides=config_overrides) tokens, lex_vs = Lexer(config=config).lex(raw) # From just the initial parse, check we're all there assert "".join(token.raw for token in tokens) == raw # Check we don't have lexing issues assert not lex_vs # TODO: Handle extremely verbose logging # temp - use negative grep: | grep -v "INFO\|DEBUG\|\[L\|#\|Initial\|^$" # better maybe - https://docs.pytest.org/en/6.2.x/logging.html#caplog-fixture if not raw: return None return Parser(config=config).parse(tokens)
def test_bigquery_table_reference_segment_iter_raw_references( table_reference, reference_parts): """Tests BigQuery override of TableReferenceSegment.iter_raw_references(). The BigQuery implementation is more complex, handling: - hyphenated table references - quoted or not quoted table references """ query = f"SELECT bar.user_id FROM {table_reference}" config = FluffConfig(overrides=dict(dialect="bigquery")) tokens, lex_vs = Lexer(config=config).lex(query) parsed = Parser(config=config).parse(tokens) for table_reference in parsed.recursive_crawl("table_reference"): actual_reference_parts = [ orp.part for orp in table_reference.iter_raw_references() ] assert reference_parts == actual_reference_parts
def test__dialect__base_parse_struct( dialect, sqlfile, code_only, yamlfile, yaml_loader ): """For given test examples, check parsed structure against yaml.""" # Load the right dialect config = FluffConfig(overrides=dict(dialect=dialect)) # Load the SQL raw = load_file(dialect, sqlfile) # Lex and parse the file tokens, _ = Lexer(config=config).lex(raw) parsed = Parser(config=config).parse(tokens) # Load the YAML res = yaml_loader(make_dialect_path(dialect, yamlfile)) if parsed: assert parsed.to_tuple(code_only=code_only, show_raw=True) == res else: assert parsed == res
def test__dialect__base_file_parse(dialect, file): """For given test examples, check successful parsing.""" raw = load_file(dialect, file) # Load the right dialect config = FluffConfig(overrides=dict(dialect=dialect)) tokens, lex_vs = Lexer(config=config).lex(raw) # From just the initial parse, check we're all there assert "".join(token.raw for token in tokens) == raw # Check we don't have lexing issues assert not lex_vs # Do the parse WITHOUT lots of logging # The logs get too long here to be useful. We should use # specfic segment tests if we want to debug logs. parsed = Parser(config=config).parse(tokens) print("Post-parse structure: {0}".format(parsed.to_tuple(show_raw=True))) print("Post-parse structure: {0}".format(parsed.stringify())) # Check we're all there. assert parsed.raw == raw # Check that there's nothing un parsable typs = parsed.type_set() assert "unparsable" not in typs
def test__parser__lexer_trim_post_subdivide(caplog): """Test a RegexLexer with a trim_post_subdivide function.""" matcher = [ RegexLexer( "function_script_terminator", r";\s+(?!\*)\/(?!\*)|\s+(?!\*)\/(?!\*)", CodeSegment, segment_kwargs={"type": "function_script_terminator"}, subdivider=StringLexer( "semicolon", ";", CodeSegment, segment_kwargs={"type": "semicolon"} ), trim_post_subdivide=RegexLexer( "newline", r"(\n|\r\n)+", NewlineSegment, ), ) ] with caplog.at_level(logging.DEBUG): res = Lexer.lex_match(";\n/\n", matcher) assert res.elements[0].raw == ";" assert res.elements[1].raw == "\n" assert res.elements[2].raw == "/" assert len(res.elements) == 3
from sqlfluff.core.parser import Parser, Lexer from sqlfluff.core import FluffConfig from sqlfluff.cli.commands import quoted_presenter from dialects.parse_fixtures import get_parse_fixtures, load_file yaml.add_representer(str, quoted_presenter) parse_success_examples, _ = get_parse_fixtures() for example in parse_success_examples: dialect, sqlfile = example config = FluffConfig(overrides=dict(dialect=dialect)) # Load the SQL raw = load_file(dialect, sqlfile) # Lex and parse the file tokens, _ = Lexer(config=config).lex(raw) tree = Parser(config=config).parse(tokens) r = None if tree: r = tree.as_record(code_only=True, show_raw=True) # Remove the .sql file extension root = sqlfile[:-4] path = os.path.join("test", "fixtures", "parser", dialect, root + ".yml") with open(path, "w", newline="\n") as f: if r: yaml.dump(r, f, default_flow_style=False) else: f.write("")
def test__parser__lexer_obj(raw, res, caplog): """Test the lexer splits as expected in a selection of cases.""" lex = Lexer(config=FluffConfig()) with caplog.at_level(logging.DEBUG): lexing_segments, _ = lex.lex(raw) assert [seg.raw for seg in lexing_segments] == res
def _lex_templated_file( templated_file: TemplatedFile, config: FluffConfig ) -> Tuple[Optional[Sequence[BaseSegment]], List[SQLLexError], FluffConfig]: """Lex a templated file. NOTE: This potentially mutates the config, so make sure to use the returned one. """ violations = [] linter_logger.info("LEXING RAW (%s)", templated_file.fname) # Get the lexer lexer = Lexer(config=config) # Lex the file and log any problems try: tokens, lex_vs = lexer.lex(templated_file) # We might just get the violations as a list violations += lex_vs linter_logger.info("Lexed tokens: %s", [seg.raw for seg in tokens] if tokens else None) except SQLLexError as err: linter_logger.info("LEXING FAILED! (%s): %s", templated_file.fname, err) violations.append(err) return None, violations, config if not tokens: # pragma: no cover TODO? return None, violations, config # Check that we've got sensible indentation from the lexer. # We might need to suppress if it's a complicated file. templating_blocks_indent = config.get("template_blocks_indent", "indentation") if isinstance(templating_blocks_indent, str): force_block_indent = templating_blocks_indent.lower().strip( ) == "force" else: force_block_indent = False templating_blocks_indent = bool(templating_blocks_indent) # If we're forcing it through we don't check. if templating_blocks_indent and not force_block_indent: indent_balance = sum( getattr(elem, "indent_val", 0) for elem in cast(Tuple[BaseSegment, ...], tokens)) if indent_balance != 0: linter_logger.debug( "Indent balance test failed for %r. Template indents will not be " "linted for this file.", templated_file.fname, ) # Don't enable the templating blocks. templating_blocks_indent = False # The file will have been lexed without config, so check all indents # are enabled. new_tokens = [] for token in cast(Tuple[BaseSegment, ...], tokens): if token.is_meta: token = cast(MetaSegment, token) if token.indent_val != 0: # Don't allow it if we're not linting templating block indents. if not templating_blocks_indent: continue new_tokens.append(token) # Return new buffer return new_tokens, violations, config