def process(self, *, fname, in_str=None, config=None): """Compile a dbt model and return the compiled SQL. Args: fname (:obj:`str`): Path to dbt model(s) in_str (:obj:`str`, optional): This is ignored for dbt config (:obj:`FluffConfig`, optional): A specific config to use for this templating operation. Only necessary for some templaters. """ self._check_dbt_installed() from dbt.exceptions import ( CompilationException as DbtCompilationException, FailedToConnectException as DbtFailedToConnectException, ) try: return self._unsafe_process(fname, in_str, config) except DbtCompilationException as e: return None, [ SQLTemplaterError( f"dbt compilation error on file '{e.node.original_file_path}', {e.msg}" ) ] except DbtFailedToConnectException as e: return None, [ SQLTemplaterError( "dbt tried to connect to the database and failed: " "you could use 'execute' https://docs.getdbt.com/reference/dbt-jinja-functions/execute/ " f"to skip the database calls. Error: {e.msg}") ] # If a SQLFluff error is raised, just pass it through except SQLTemplaterError as e: return None, [e]
def process(self, *, fname, in_str=None, config=None, formatter=None): """Compile a dbt model and return the compiled SQL. Args: fname (:obj:`str`): Path to dbt model(s) in_str (:obj:`str`, optional): This is ignored for dbt config (:obj:`FluffConfig`, optional): A specific config to use for this templating operation. Only necessary for some templaters. formatter (:obj:`CallbackFormatter`): Optional object for output. """ # Stash the formatter if provided to use in cached methods. self.formatter = formatter self._check_dbt_installed() from dbt.exceptions import ( CompilationException as DbtCompilationException, FailedToConnectException as DbtFailedToConnectException, ) self.sqlfluff_config = config self.project_dir = self._get_project_dir() self.profiles_dir = self._get_profiles_dir() fname_absolute_path = os.path.abspath(fname) try: os.chdir(self.project_dir) processed_result = self._unsafe_process(fname_absolute_path, in_str, config) # Reset the fail counter self._sequential_fails = 0 return processed_result except DbtCompilationException as e: # Increment the counter self._sequential_fails += 1 return None, [ SQLTemplaterError( f"dbt compilation error on file '{e.node.original_file_path}', {e.msg}", # It's fatal if we're over the limit fatal=self._sequential_fails > self.sequential_fail_limit, ) ] except DbtFailedToConnectException as e: return None, [ SQLTemplaterError( "dbt tried to connect to the database and failed: " "you could use 'execute' https://docs.getdbt.com/reference/dbt-jinja-functions/execute/ " f"to skip the database calls. Error: {e.msg}", fatal=True, ) ] # If a SQLFluff error is raised, just pass it through except SQLTemplaterError as e: return None, [e] finally: os.chdir(self.working_dir)
def _unsafe_process(self, fname, in_str=None, config=None): if not config: raise ValueError( "For the dbt templater, the `process()` method requires a config object." ) if not fname: raise ValueError( "For the dbt templater, the `process()` method requires a file name" ) elif fname == "stdin": raise ValueError( "The dbt templater does not support stdin input, provide a path instead" ) self.sqlfluff_config = config selected = self.dbt_selector_method.search( included_nodes=self.dbt_manifest.nodes, # Selector needs to be a relative path selector=os.path.relpath(fname, start=os.getcwd()), ) results = [self.dbt_manifest.expect(uid) for uid in selected] if not results: raise RuntimeError("File %s was not found in dbt project" % fname) node = self.dbt_compiler.compile_node( node=results[0], manifest=self.dbt_manifest, ) if hasattr(node, "injected_sql"): # If injected SQL is present, it contains a better picture # of what will actually hit the database (e.g. with tests). # However it's not always present. compiled_sql = node.injected_sql else: compiled_sql = node.compiled_sql if not compiled_sql: raise SQLTemplaterError( "dbt templater compilation failed silently, check your configuration " "by running `dbt compile` directly.") raw_sliced, sliced_file, templated_sql = self.slice_file(node.raw_sql, compiled_sql, config=config) return ( TemplatedFile( source_str=node.raw_sql, templated_str=templated_sql, fname=fname, sliced_file=sliced_file, raw_sliced=raw_sliced, ), # No violations returned in this way. [], )
def _crawl_tree(cls, tree, variable_names, raw): """Crawl the tree looking for occurrences of the undeclared values.""" # First iterate through children for elem in tree.iter_child_nodes(): yield from cls._crawl_tree(elem, variable_names, raw) # Then assess self if isinstance(tree, jinja2.nodes.Name) and tree.name in variable_names: line_no = tree.lineno line = raw.split("\n")[line_no - 1] pos = line.index(tree.name) + 1 yield SQLTemplaterError( f"Undefined jinja template variable: {tree.name!r}", line_no=line_no, line_pos=pos, )
def process(self, *, in_str: str, fname: Optional[str] = None, config=None, formatter=None) -> Tuple[Optional[TemplatedFile], list]: """Process a string and return a TemplatedFile. Note that the arguments are enforced as keywords because Templaters can have differences in their `process` method signature. A Templater that only supports reading from a file would need the following signature: process(*, fname, in_str=None, config=None) (arguments are swapped) Args: in_str (:obj:`str`): The input string. fname (:obj:`str`, optional): The filename of this string. This is mostly for loading config files at runtime. config (:obj:`FluffConfig`): A specific config to use for this templating operation. Only necessary for some templaters. formatter (:obj:`CallbackFormatter`): Optional object for output. """ live_context = self.get_context(fname=fname, config=config) try: new_str = in_str.format(**live_context) except KeyError as err: # TODO: Add a url here so people can get more help. raise SQLTemplaterError( "Failure in Python templating: {0}. Have you configured your variables?" .format(err)) raw_sliced, sliced_file, new_str = self.slice_file(in_str, new_str, config=config) return ( TemplatedFile( source_str=in_str, templated_str=new_str, fname=fname, sliced_file=sliced_file, raw_sliced=raw_sliced, ), [], )
def _crawl_tree(cls, tree, variable_names, raw) -> Generator[SQLTemplaterError, None, None]: """Crawl the tree looking for occurrences of the undeclared values.""" # First iterate through children for elem in tree.iter_child_nodes(): yield from cls._crawl_tree(elem, variable_names, raw) # Then assess self if (isinstance(tree, jinja2.nodes.Name) and getattr(tree, "name") in variable_names): line_no: int = getattr(tree, "lineno") tree_name: str = getattr(tree, "name") line = raw.split("\n")[line_no - 1] pos = line.index(tree_name) + 1 yield SQLTemplaterError( f"Undefined jinja template variable: {tree_name!r}", line_no=line_no, line_pos=pos, )
def _crawl_tree(cls, tree, variable_names, raw): """Crawl the tree looking for occurrences of the undeclared values.""" # First iterate through children for elem in tree.iter_child_nodes(): yield from cls._crawl_tree(elem, variable_names, raw) # Then assess self if isinstance(tree, jinja2.nodes.Name) and tree.name in variable_names: line_no = tree.lineno line = raw.split("\n")[line_no - 1] pos = line.index(tree.name) + 1 # Generate the charpos. +1 is for the newline characters themselves charpos = (sum( len(raw_line) + 1 for raw_line in raw.split("\n")[:line_no - 1]) + pos) # NB: The positions returned here will be *inconsistent* with those # from the linter at the moment, because these are references to the # structure of the file *before* templating. yield SQLTemplaterError( "Undefined jinja template variable: {0!r}".format(tree.name), pos=FilePositionMarker(None, line_no, pos, charpos), )
def _extract_macros_from_path(cls, path: List[str], env: Environment, ctx: Dict): """Take a path and extract macros from it.""" macro_ctx = {} for path_entry in path: # Does it exist? It should as this check was done on config load. if not os.path.exists(path_entry): raise ValueError(f"Path does not exist: {path_entry}") if os.path.isfile(path_entry): # It's a file. Extract macros from it. with open(path_entry) as opened_file: template = opened_file.read() # Update the context with macros from the file. try: macro_ctx.update( cls._extract_macros_from_template(template, env=env, ctx=ctx)) except TemplateSyntaxError as err: raise SQLTemplaterError( f"Error in Jinja macro file {os.path.relpath(path_entry)}: " f"{err.message}", line_no=err.lineno, line_pos=1, ) from err else: # It's a directory. Iterate through files in it and extract from them. for dirpath, _, files in os.walk(path_entry): for fname in files: if fname.endswith(".sql"): macro_ctx.update( cls._extract_macros_from_path( [os.path.join(dirpath, fname)], env=env, ctx=ctx)) return macro_ctx
def _unsafe_process(self, fname, in_str=None, config=None): if not config: raise ValueError( "For the dbt templater, the `process()` method requires a config object." ) if not fname: raise ValueError( "For the dbt templater, the `process()` method requires a file name" ) elif fname == "stdin": raise ValueError( "The dbt templater does not support stdin input, provide a path instead" ) selected = self.dbt_selector_method.search( included_nodes=self.dbt_manifest.nodes, # Selector needs to be a relative path selector=os.path.relpath(fname, start=os.getcwd()), ) results = [self.dbt_manifest.expect(uid) for uid in selected] if not results: model_name = os.path.splitext(os.path.basename(fname))[0] disabled_model = self.dbt_manifest.find_disabled_by_name(name=model_name) if disabled_model and os.path.abspath( disabled_model.original_file_path ) == os.path.abspath(fname): raise SQLTemplaterSkipFile( f"Skipped file {fname} because the model was disabled" ) raise RuntimeError("File %s was not found in dbt project" % fname) node = self.dbt_compiler.compile_node( node=results[0], manifest=self.dbt_manifest, ) if hasattr(node, "injected_sql"): # If injected SQL is present, it contains a better picture # of what will actually hit the database (e.g. with tests). # However it's not always present. compiled_sql = node.injected_sql else: compiled_sql = node.compiled_sql if not compiled_sql: raise SQLTemplaterError( "dbt templater compilation failed silently, check your configuration " "by running `dbt compile` directly." ) with open(fname) as source_dbt_model: source_dbt_sql = source_dbt_model.read() n_trailing_newlines = len(source_dbt_sql) - len(source_dbt_sql.rstrip("\n")) templater_logger.debug( " Trailing newline count in source dbt model: %r", n_trailing_newlines ) templater_logger.debug(" Raw SQL before compile: %r", source_dbt_sql) templater_logger.debug(" Node raw SQL: %r", node.raw_sql) templater_logger.debug(" Node compiled SQL: %r", compiled_sql) # When using dbt-templater, trailing newlines are ALWAYS REMOVED during # compiling. Unless fixed (like below), this will cause: # 1. L009 linting errors when running "sqlfluff lint foo_bar.sql" # since the linter will use the compiled code with the newlines # removed. # 2. "No newline at end of file" warnings in Git/GitHub since # sqlfluff uses the compiled SQL to write fixes back to the # source SQL in the dbt model. # The solution is: # 1. Check for trailing newlines before compiling by looking at the # raw SQL in the source dbt file, store the count of trailing newlines. # 2. Append the count from #1 above to the node.raw_sql and # compiled_sql objects, both of which have had the trailing # newlines removed by the dbt-templater. node.raw_sql = node.raw_sql + "\n" * n_trailing_newlines compiled_sql = compiled_sql + "\n" * n_trailing_newlines raw_sliced, sliced_file, templated_sql = self.slice_file( node.raw_sql, compiled_sql, config=config, ) return ( TemplatedFile( source_str=node.raw_sql, templated_str=templated_sql, fname=fname, sliced_file=sliced_file, raw_sliced=raw_sliced, ), # No violations returned in this way. [], )
def process(self, *, in_str: str, fname: str, config=None, formatter=None) -> Tuple[Optional[TemplatedFile], list]: """Process a string and return the new string. Note that the arguments are enforced as keywords because Templaters can have differences in their `process` method signature. A Templater that only supports reading from a file would need the following signature: process(*, fname, in_str=None, config=None) (arguments are swapped) Args: in_str (:obj:`str`): The input string. fname (:obj:`str`, optional): The filename of this string. This is mostly for loading config files at runtime. config (:obj:`FluffConfig`): A specific config to use for this templating operation. Only necessary for some templaters. formatter (:obj:`CallbackFormatter`): Optional object for output. """ if not config: # pragma: no cover raise ValueError( "For the jinja templater, the `process()` method requires a config " "object.") env, live_context, make_template = self.template_builder(fname=fname, config=config) # Load the template, passing the global context. try: template = make_template(in_str) except TemplateSyntaxError as err: # Something in the template didn't parse, return the original # and a violation around what happened. return ( TemplatedFile(source_str=in_str, fname=fname), [ SQLTemplaterError( f"Failure to parse jinja template: {err}.", line_no=err.lineno, ) ], ) violations = [] # Attempt to identify any undeclared variables. The majority # will be found during the _crawl_tree step rather than this # first Exception which serves only to catch catastrophic errors. try: syntax_tree = env.parse(in_str) undefined_variables = meta.find_undeclared_variables(syntax_tree) except Exception as err: # pragma: no cover # TODO: Add a url here so people can get more help. raise SQLTemplaterError( f"Failure in identifying Jinja variables: {err}.") # Get rid of any that *are* actually defined. for val in live_context: if val in undefined_variables: undefined_variables.remove(val) if undefined_variables: # Lets go through and find out where they are: for val in self._crawl_tree(syntax_tree, undefined_variables, in_str): violations.append(val) try: # NB: Passing no context. Everything is loaded when the template is loaded. out_str = template.render() # Slice the file once rendered. raw_sliced, sliced_file, out_str = self.slice_file( in_str, out_str, config=config, make_template=make_template, ) return ( TemplatedFile( source_str=in_str, templated_str=out_str, fname=fname, sliced_file=sliced_file, raw_sliced=raw_sliced, ), violations, ) except (TemplateError, TypeError) as err: templater_logger.info("Unrecoverable Jinja Error: %s", err) violations.append( SQLTemplaterError( ("Unrecoverable failure in Jinja templating: {}. Have you " "configured your variables? " "https://docs.sqlfluff.com/en/latest/configuration.html" ).format(err), # We don't have actual line number information, but specify # line 1 so users can ignore with "noqa" if they want. (The # default is line 0, which can't be ignored because it's not # a valid line number.) line_no=1, line_pos=1, )) return None, violations
def process( self, *, in_str: str, fname: Optional[str] = None, config=None ) -> Tuple[Optional[TemplatedFile], list]: """Process a string and return the new string. Note that the arguments are enforced as keywords because Templaters can have differences in their `process` method signature. A Templater that only supports reading from a file would need the following signature: process(*, fname, in_str=None, config=None) (arguments are swapped) Args: in_str (:obj:`str`): The input string. fname (:obj:`str`, optional): The filename of this string. This is mostly for loading config files at runtime. config (:obj:`FluffConfig`): A specific config to use for this templating operation. Only necessary for some templaters. """ if not config: raise ValueError( "For the jinja templater, the `process()` method requires a config object." ) # Load the context live_context = self.get_context(fname=fname, config=config) # Apply dbt builtin functions if we're allowed. apply_dbt_builtins = config.get_section( (self.templater_selector, self.name, "apply_dbt_builtins") ) if apply_dbt_builtins: # This feels a bit wrong defining these here, they should probably # be configurable somewhere sensible. But for now they're not. # TODO: Come up with a better solution. dbt_builtins = self._generate_dbt_builtins() for name in dbt_builtins: # Only apply if it hasn't already been set at this stage. if name not in live_context: live_context[name] = dbt_builtins[name] env = self._get_jinja_env() # Load macros from path (if applicable) macros_path = config.get_section( (self.templater_selector, self.name, "load_macros_from_path") ) if macros_path: live_context.update( self._extract_macros_from_path(macros_path, env=env, ctx=live_context) ) # Load config macros, these will take precedence over macros from the path live_context.update( self._extract_macros_from_config(config=config, env=env, ctx=live_context) ) live_context.update(self._extract_libraries_from_config(config=config)) # Load the template, passing the global context. try: template = env.from_string(in_str, globals=live_context) except TemplateSyntaxError as err: # Something in the template didn't parse, return the original # and a violation around what happened. (len(line) for line in in_str.split("\n")[: err.lineno]) return ( TemplatedFile(source_str=in_str, fname=fname), [ SQLTemplaterError( "Failure to parse jinja template: {0}.".format(err), pos=FilePositionMarker( None, err.lineno, None, # Calculate the charpos for sorting. sum( len(line) for line in in_str.split("\n")[: err.lineno - 1] ), ), ) ], ) violations = [] # Attempt to identify any undeclared variables. The majority # will be found during the _crawl_tree step rather than this # first Exception which serves only to catch catastrophic errors. try: syntax_tree = env.parse(in_str) undefined_variables = meta.find_undeclared_variables(syntax_tree) except Exception as err: # TODO: Add a url here so people can get more help. raise SQLTemplaterError( "Failure in identifying Jinja variables: {0}.".format(err) ) # Get rid of any that *are* actually defined. for val in live_context: if val in undefined_variables: undefined_variables.remove(val) if undefined_variables: # Lets go through and find out where they are: for val in self._crawl_tree(syntax_tree, undefined_variables, in_str): violations.append(val) try: # NB: Passing no context. Everything is loaded when the template is loaded. out_str = template.render() # Slice the file once rendered. raw_sliced, sliced_file, out_str = self.slice_file( in_str, out_str, config=config ) return ( TemplatedFile( source_str=in_str, templated_str=out_str, fname=fname, sliced_file=sliced_file, raw_sliced=raw_sliced, ), violations, ) except (TemplateError, TypeError) as err: templater_logger.info("Unrecoverable Jinja Error: %s", err) violations.append( SQLTemplaterError( ( "Unrecoverable failure in Jinja templating: {0}. Have you configured " "your variables? https://docs.sqlfluff.com/en/latest/configuration.html" ).format(err) ) ) return None, violations
def process(self, *, in_str: str, fname: str, config=None, formatter=None) -> Tuple[Optional[TemplatedFile], list]: """Process a string and return a TemplatedFile. Note that the arguments are enforced as keywords because Templaters can have differences in their `process` method signature. A Templater that only supports reading from a file would need the following signature: process(*, fname, in_str=None, config=None) (arguments are swapped) Args: in_str (:obj:`str`): The input string. fname (:obj:`str`, optional): The filename of this string. This is mostly for loading config files at runtime. config (:obj:`FluffConfig`): A specific config to use for this templating operation. Only necessary for some templaters. formatter (:obj:`CallbackFormatter`): Optional object for output. """ context = self.get_context(config) template_slices = [] raw_slices = [] last_pos_raw, last_pos_templated = 0, 0 out_str = "" regex = context["__bind_param_regex"] # when the param has no name, use a 1-based index param_counter = 1 for found_param in regex.finditer(in_str): span = found_param.span() if "param_name" not in found_param.groupdict(): param_name = str(param_counter) param_counter += 1 else: param_name = found_param["param_name"] last_literal_length = span[0] - last_pos_raw try: replacement = str(context[param_name]) except KeyError as err: # TODO: Add a url here so people can get more help. raise SQLTemplaterError( "Failure in placeholder templating: {}. Have you configured your " "variables?".format(err)) # add the literal to the slices template_slices.append( TemplatedFileSlice( slice_type="literal", source_slice=slice(last_pos_raw, span[0], None), templated_slice=slice( last_pos_templated, last_pos_templated + last_literal_length, None, ), )) raw_slices.append( RawFileSlice( raw=in_str[last_pos_raw:span[0]], slice_type="literal", source_idx=last_pos_raw, )) out_str += in_str[last_pos_raw:span[0]] # add the current replaced element start_template_pos = last_pos_templated + last_literal_length template_slices.append( TemplatedFileSlice( slice_type="templated", source_slice=slice(span[0], span[1], None), templated_slice=slice( start_template_pos, start_template_pos + len(replacement), None), )) raw_slices.append( RawFileSlice( raw=in_str[span[0]:span[1]], slice_type="templated", source_idx=span[0], )) out_str += replacement # update the indexes last_pos_raw = span[1] last_pos_templated = start_template_pos + len(replacement) # add the last literal, if any if len(in_str) > last_pos_raw: template_slices.append( TemplatedFileSlice( slice_type="literal", source_slice=slice(last_pos_raw, len(in_str), None), templated_slice=slice( last_pos_templated, last_pos_templated + (len(in_str) - last_pos_raw), None, ), )) raw_slices.append( RawFileSlice( raw=in_str[last_pos_raw:], slice_type="literal", source_idx=last_pos_raw, )) out_str += in_str[last_pos_raw:] return ( TemplatedFile( # original string source_str=in_str, # string after all replacements templated_str=out_str, # filename fname=fname, # list of TemplatedFileSlice sliced_file=template_slices, # list of RawFileSlice, same size raw_sliced=raw_slices, ), [], # violations, always empty )
def _unsafe_process(self, fname, in_str=None, config=None): original_file_path = os.path.relpath(fname, start=os.getcwd()) # Below, we monkeypatch Environment.from_string() to intercept when dbt # compiles (i.e. runs Jinja) to expand the "node" corresponding to fname. # We do this to capture the Jinja context at the time of compilation, i.e.: # - Jinja Environment object # - Jinja "globals" dictionary # # This info is captured by the "make_template()" function, which in # turn is used by our parent class' (JinjaTemplater) slice_file() # function. old_from_string = Environment.from_string make_template = None def from_string(*args, **kwargs): """Replaces (via monkeypatch) the jinja2.Environment function.""" nonlocal make_template # Is it processing the node corresponding to fname? globals = kwargs.get("globals") if globals: model = globals.get("model") if model: if model.get("original_file_path") == original_file_path: # Yes. Capture the important arguments and create # a make_template() function. env = args[0] globals = args[2] if len(args) >= 3 else kwargs["globals"] def make_template(in_str): env.add_extension(SnapshotExtension) return env.from_string(in_str, globals=globals) return old_from_string(*args, **kwargs) node = self._find_node(fname, config) templater_logger.debug( "_find_node for path %r returned object of type %s.", fname, type(node) ) save_ephemeral_nodes = dict( (k, v) for k, v in self.dbt_manifest.nodes.items() if v.config.materialized == "ephemeral" and not getattr(v, "compiled", False) ) with self.connection(): # Apply the monkeypatch. Environment.from_string = from_string try: node = self.dbt_compiler.compile_node( node=node, manifest=self.dbt_manifest, ) except Exception as err: templater_logger.exception( "Fatal dbt compilation error on %s. This occurs most often " "during incorrect sorting of ephemeral models before linting. " "Please report this error on github at " "https://github.com/sqlfluff/sqlfluff/issues, including " "both the raw and compiled sql for the model affected.", fname, ) # Additional error logging in case we get a fatal dbt error. raise SQLFluffSkipFile( # pragma: no cover f"Skipped file {fname} because dbt raised a fatal " f"exception during compilation: {err!s}" ) from err finally: # Undo the monkeypatch. Environment.from_string = old_from_string if hasattr(node, "injected_sql"): # If injected SQL is present, it contains a better picture # of what will actually hit the database (e.g. with tests). # However it's not always present. compiled_sql = node.injected_sql else: compiled_sql = getattr(node, COMPILED_SQL_ATTRIBUTE) raw_sql = getattr(node, RAW_SQL_ATTRIBUTE) if not compiled_sql: # pragma: no cover raise SQLTemplaterError( "dbt templater compilation failed silently, check your " "configuration by running `dbt compile` directly." ) with open(fname) as source_dbt_model: source_dbt_sql = source_dbt_model.read() if not source_dbt_sql.rstrip().endswith("-%}"): n_trailing_newlines = len(source_dbt_sql) - len( source_dbt_sql.rstrip("\n") ) else: # Source file ends with right whitespace stripping, so there's # no need to preserve/restore trailing newlines, as they would # have been removed regardless of dbt's # keep_trailing_newlines=False behavior. n_trailing_newlines = 0 templater_logger.debug( " Trailing newline count in source dbt model: %r", n_trailing_newlines, ) templater_logger.debug(" Raw SQL before compile: %r", source_dbt_sql) templater_logger.debug(" Node raw SQL: %r", raw_sql) templater_logger.debug(" Node compiled SQL: %r", compiled_sql) # When using dbt-templater, trailing newlines are ALWAYS REMOVED during # compiling. Unless fixed (like below), this will cause: # 1. Assertion errors in TemplatedFile, when it sanity checks the # contents of the sliced_file array. # 2. L009 linting errors when running "sqlfluff lint foo_bar.sql" # since the linter will use the compiled code with the newlines # removed. # 3. "No newline at end of file" warnings in Git/GitHub since # sqlfluff uses the compiled SQL to write fixes back to the # source SQL in the dbt model. # # The solution is (note that both the raw and compiled files have # had trailing newline(s) removed by the dbt-templater. # 1. Check for trailing newlines before compiling by looking at the # raw SQL in the source dbt file. Remember the count of trailing # newlines. # 2. Set node.raw_sql/node.raw_code to the original source file contents. # 3. Append the count from #1 above to compiled_sql. (In # production, slice_file() does not usually use this string, # but some test scenarios do. setattr(node, RAW_SQL_ATTRIBUTE, source_dbt_sql) compiled_sql = compiled_sql + "\n" * n_trailing_newlines # TRICKY: dbt configures Jinja2 with keep_trailing_newline=False. # As documented (https://jinja.palletsprojects.com/en/3.0.x/api/), # this flag's behavior is: "Preserve the trailing newline when # rendering templates. The default is False, which causes a single # newline, if present, to be stripped from the end of the template." # # Below, we use "append_to_templated" to effectively "undo" this. raw_sliced, sliced_file, templated_sql = self.slice_file( source_dbt_sql, compiled_sql, config=config, make_template=make_template, append_to_templated="\n" if n_trailing_newlines else "", ) # :HACK: If calling compile_node() compiled any ephemeral nodes, # restore them to their earlier state. This prevents a runtime error # in the dbt "_inject_ctes_into_sql()" function that occurs with # 2nd-level ephemeral model dependencies (e.g. A -> B -> C, where # both B and C are ephemeral). Perhaps there is a better way to do # this, but this seems good enough for now. for k, v in save_ephemeral_nodes.items(): if getattr(self.dbt_manifest.nodes[k], "compiled", False): self.dbt_manifest.nodes[k] = v return ( TemplatedFile( source_str=source_dbt_sql, templated_str=templated_sql, fname=fname, sliced_file=sliced_file, raw_sliced=raw_sliced, ), # No violations returned in this way. [], )
def _unsafe_process(self, fname, in_str=None, config=None): original_file_path = os.path.relpath(fname, start=os.getcwd()) # Below, we monkeypatch Environment.from_string() to intercept when dbt # compiles (i.e. runs Jinja) to expand the "node" corresponding to fname. # We do this to capture the Jinja context at the time of compilation, i.e.: # - Jinja Environment object # - Jinja "globals" dictionary # # This info is captured by the "make_template()" function, which in # turn is used by our parent class' (JinjaTemplater) slice_file() # function. old_from_string = Environment.from_string try: make_template = None def from_string(*args, **kwargs): """Replaces (via monkeypatch) the jinja2.Environment function.""" nonlocal make_template # Is it processing the node corresponding to fname? globals = kwargs.get("globals") if globals: model = globals.get("model") if model: if model.get( "original_file_path") == original_file_path: # Yes. Capture the important arguments and create # a make_template() function. env = args[0] globals = args[2] if len( args) >= 3 else kwargs["globals"] def make_template(in_str): env.add_extension(SnapshotExtension) return env.from_string(in_str, globals=globals) return old_from_string(*args, **kwargs) finally: # Undo the monkeypatch. Environment.from_string = from_string node = self._find_node(fname, config) save_ephemeral_nodes = dict( (k, v) for k, v in self.dbt_manifest.nodes.items() if v.config.materialized == "ephemeral" and not getattr(v, "compiled", False)) with self.connection(): node = self.dbt_compiler.compile_node( node=node, manifest=self.dbt_manifest, ) Environment.from_string = old_from_string if hasattr(node, "injected_sql"): # If injected SQL is present, it contains a better picture # of what will actually hit the database (e.g. with tests). # However it's not always present. compiled_sql = node.injected_sql else: compiled_sql = node.compiled_sql if not compiled_sql: # pragma: no cover raise SQLTemplaterError( "dbt templater compilation failed silently, check your " "configuration by running `dbt compile` directly.") with open(fname) as source_dbt_model: source_dbt_sql = source_dbt_model.read() n_trailing_newlines = len(source_dbt_sql) - len( source_dbt_sql.rstrip("\n")) templater_logger.debug( " Trailing newline count in source dbt model: %r", n_trailing_newlines, ) templater_logger.debug(" Raw SQL before compile: %r", source_dbt_sql) templater_logger.debug(" Node raw SQL: %r", node.raw_sql) templater_logger.debug(" Node compiled SQL: %r", compiled_sql) # When using dbt-templater, trailing newlines are ALWAYS REMOVED during # compiling. Unless fixed (like below), this will cause: # 1. L009 linting errors when running "sqlfluff lint foo_bar.sql" # since the linter will use the compiled code with the newlines # removed. # 2. "No newline at end of file" warnings in Git/GitHub since # sqlfluff uses the compiled SQL to write fixes back to the # source SQL in the dbt model. # The solution is: # 1. Check for trailing newlines before compiling by looking at the # raw SQL in the source dbt file, store the count of trailing # newlines. # 2. Append the count from #1 above to the node.raw_sql and # compiled_sql objects, both of which have had the trailing # newlines removed by the dbt-templater. node.raw_sql = node.raw_sql + "\n" * n_trailing_newlines compiled_sql = compiled_sql + "\n" * n_trailing_newlines raw_sliced, sliced_file, templated_sql = self.slice_file( source_dbt_sql, compiled_sql, config=config, make_template=make_template, ) # :HACK: If calling compile_node() compiled any ephemeral nodes, # restore them to their earlier state. This prevents a runtime error # in the dbt "_inject_ctes_into_sql()" function that occurs with # 2nd-level ephemeral model dependencies (e.g. A -> B -> C, where # both B and C are ephemeral). Perhaps there is a better way to do # this, but this seems good enough for now. for k, v in save_ephemeral_nodes.items(): if getattr(self.dbt_manifest.nodes[k], "compiled", False): self.dbt_manifest.nodes[k] = v if make_template and n_trailing_newlines: # Update templated_sql as we updated the other strings above. Update # sliced_file to reflect the mapping of the added character(s) back # to the raw SQL. templated_sql = templated_sql + "\n" * n_trailing_newlines sliced_file.append( TemplatedFileSlice( slice_type="literal", source_slice=slice( len(source_dbt_sql) - n_trailing_newlines, len(source_dbt_sql)), templated_slice=slice( len(templated_sql) - n_trailing_newlines, len(templated_sql)), )) return ( TemplatedFile( source_str=source_dbt_sql, templated_str=templated_sql, fname=fname, sliced_file=sliced_file, raw_sliced=raw_sliced, ), # No violations returned in this way. [], )