def parse( sql: str, dialect: str = "ansi", config_path: Optional[str] = None, ) -> Dict[str, Any]: """Parse a SQL string. Args: sql (:obj:`str`): The SQL to be parsed. dialect (:obj:`str`, optional): A reference to the dialect of the SQL to be parsed. Defaults to `ansi`. config_path (:obj:`Optional[str]`, optional): A path to a .sqlfluff config. Defaults to None. Returns: :obj:`Dict[str, Any]` JSON containing the parsed structure. """ cfg = get_simple_config( dialect=dialect, config_path=config_path, ) linter = Linter(config=cfg) parsed = linter.parse_string(sql) # If we encounter any parsing errors, raise them in a combined issue. if parsed.violations: raise APIParsingError(parsed.violations) # Return a JSON representation of the parse tree. if parsed.tree is None: # pragma: no cover return {} return parsed.tree.as_record(show_raw=True)
def test__dialect__ansi_specific_segment_not_parse(raw, err_locations, caplog): """Test queries do not parse, with parsing errors raised properly.""" lnt = Linter() parsed = lnt.parse_string(raw) assert len(parsed.violations) > 0 locs = [(v.line_no(), v.line_pos()) for v in parsed.violations] assert locs == err_locations
def test__dialect__ansi_is_whitespace(): """Test proper tagging with is_whitespace.""" lnt = Linter() with open( "test/fixtures/parser/ansi/select_in_multiline_comment.sql") as f: parsed = lnt.parse_string(f.read()) # Check all the segments that *should* be whitespace, ARE for raw_seg in parsed.tree.iter_raw_seg(): if raw_seg.is_type("whitespace", "newline"): assert raw_seg.is_whitespace
def test__templater_jinja_lint_empty(): """Check that parsing a file which renders to an empty string. No exception should be raised, but the parsed tree should be None. """ lntr = Linter() parsed = lntr.parse_string(in_str='{{ "" }}') assert parsed.templated_file.source_str == '{{ "" }}' assert parsed.templated_file.templated_str == "" assert parsed.tree is None
def test_snowflake_queries(segment_cls, raw, caplog): """Test snowflake specific queries parse.""" lnt = Linter(dialect="snowflake") parsed, vs, _ = lnt.parse_string(raw) assert len(vs) == 0 # Find any unparsable statements typs = parsed.type_set() assert "unparsable" not in typs # Find the expected type in the parsed segment child_segments = [seg for seg in parsed.recursive_crawl(segment_cls.type)] assert len(child_segments) > 0
def test__dialect__ansi_parse_indented_joins(sql_string, indented_joins, meta_loc): """Test parsing of meta segments using Conditional works with indented_joins.""" lnt = Linter(config=FluffConfig( configs={"indentation": { "indented_joins": indented_joins }})) parsed = lnt.parse_string(sql_string) # Check that there's nothing unparsable assert "unparsable" not in parsed.tree.type_set() # Check all the segments that *should* be whitespace, ARE res_meta_locs = tuple( idx for idx, raw_seg in enumerate(parsed.tree.iter_raw_seg()) if raw_seg.is_meta) assert res_meta_locs == meta_loc
def test_snowflake_queries(segment_cls, raw, caplog): """Test snowflake specific queries parse.""" lnt = Linter(dialect="snowflake") parsed = lnt.parse_string(raw) print(parsed.violations) assert len(parsed.violations) == 0 # Find any unparsable statements typs = parsed.tree.type_set() assert "unparsable" not in typs # Find the expected type in the parsed segment seg_type = dialect_selector("snowflake").get_segment(segment_cls).type child_segments = [seg for seg in parsed.tree.recursive_crawl(seg_type)] assert len(child_segments) > 0
def test__dialect__ansi_parse_indented_joins(sql_string, indented_joins, meta_loc): """Test parsing of meta segments using Conditional works with indented_joins.""" lnt = Linter(config=FluffConfig( configs={"indentation": { "indented_joins": indented_joins }}, overrides={"dialect": "ansi"}, )) parsed = lnt.parse_string(sql_string) # Check that there's nothing unparsable assert "unparsable" not in parsed.tree.type_set() # Check all the segments that *should* be metas, ARE. # NOTE: This includes the end of file marker. res_meta_locs = tuple( idx for idx, raw_seg in enumerate(parsed.tree.get_raw_segments()) if raw_seg.is_meta) assert res_meta_locs == meta_loc
def parse(sql, dialect="ansi"): """Parse a sql string or file. Args: sql (:obj:`str` or file-like object): The sql to be linted either as a string or a subclass of :obj:`TextIOBase`. dialect (:obj:`str`, optional): A reference to the dialect of the sql to be linted. Defaults to `ansi`. Returns: :obj:`ParsedString` containing the parsed structure. """ sql = _unify_str_or_file(sql) linter = Linter(dialect=dialect) parsed = linter.parse_string(sql) # If we encounter any parsing errors, raise them in a combined issue. if parsed.violations: raise APIParsingError(parsed.violations) return parsed
def _validate_dialect_specific_statements(dialect, segment_cls, raw, stmt_count): """This validates one or multiple statements against specified segment class. It even validates the number of parsed statements with the number of expected statements. """ lnt = Linter(dialect=dialect) parsed = lnt.parse_string(raw) assert len(parsed.violations) == 0 # Find any unparsable statements typs = parsed.tree.type_set() assert "unparsable" not in typs # Find the expected type in the parsed segment child_segments = [ seg for seg in parsed.tree.recursive_crawl(segment_cls.type) ] assert len(child_segments) == stmt_count # Check if all child segments are the correct type for c in child_segments: assert isinstance(c, segment_cls)
def test__linter__empty_file(): """Test linter behaves nicely with an empty string.""" lntr = Linter() # Make sure no exceptions raised and no violations found in empty file. parsed = lntr.parse_string("") assert not parsed.violations