def make_parse_result( config: RuntimeConfig, all_projects: Mapping[str, Project] ) -> ParseResult: """Make a ParseResult from the project configuration and the profile.""" # if any of these change, we need to reject the parser vars_hash = FileHash.from_contents( '\x00'.join([ getattr(config.args, 'vars', '{}') or '{}', getattr(config.args, 'profile', '') or '', getattr(config.args, 'target', '') or '', __version__ ]) ) profile_path = os.path.join(config.args.profiles_dir, 'profiles.yml') with open(profile_path) as fp: profile_hash = FileHash.from_contents(fp.read()) project_hashes = {} for name, project in all_projects.items(): path = os.path.join(project.project_root, 'dbt_project.yml') with open(path) as fp: project_hashes[name] = FileHash.from_contents(fp.read()) return ParseResult( vars_hash=vars_hash, profile_hash=profile_hash, project_hashes=project_hashes, )
def build_manifest_state_check(self): config = self.root_project all_projects = self.all_projects # if any of these change, we need to reject the parser vars_hash = FileHash.from_contents( '\x00'.join([ getattr(config.args, 'vars', '{}') or '{}', getattr(config.args, 'profile', '') or '', getattr(config.args, 'target', '') or '', __version__ ]) ) profile_path = os.path.join(config.args.profiles_dir, 'profiles.yml') with open(profile_path) as fp: profile_hash = FileHash.from_contents(fp.read()) project_hashes = {} for name, project in all_projects.items(): path = os.path.join(project.project_root, 'dbt_project.yml') with open(path) as fp: project_hashes[name] = FileHash.from_contents(fp.read()) state_check = ManifestStateCheck( vars_hash=vars_hash, profile_hash=profile_hash, project_hashes=project_hashes, ) return state_check
def _mock_state_check(self): config = self.root_project all_projects = self.all_projects return ManifestStateCheck( vars_hash=FileHash.from_contents('vars'), project_hashes={name: FileHash.from_contents(name) for name in all_projects}, profile_hash=FileHash.from_contents('profile'), )
def basic_compiled_schema_test_node(): return CompiledSchemaTestNode( package_name='test', root_path='/root/', path='/root/x/path.sql', original_file_path='/root/path.sql', raw_sql='select * from {{ ref("other") }}', name='foo', resource_type=NodeType.Test, unique_id='model.test.foo', fqn=['test', 'models', 'foo'], refs=[], sources=[], depends_on=DependsOn(), deferred=False, description='', database='test_db', schema='test_schema', alias='bar', tags=[], config=TestConfig(severity='warn'), meta={}, compiled=True, extra_ctes=[InjectedCTE('whatever', 'select * from other')], extra_ctes_injected=True, compiled_sql='with whatever as (select * from other) select * from whatever', column_name='id', test_metadata=TestMetadata(namespace=None, name='foo', kwargs={}), checksum=FileHash.from_contents(''), unrendered_config={ 'severity': 'warn', } )
def basic_compiled_model(): return CompiledModelNode( package_name='test', root_path='/root/', path='/root/models/foo.sql', original_file_path='models/foo.sql', raw_sql='select * from {{ ref("other") }}', name='foo', resource_type=NodeType.Model, unique_id='model.test.foo', fqn=['test', 'models', 'foo'], refs=[], sources=[], depends_on=DependsOn(), deferred=True, description='', database='test_db', schema='test_schema', alias='bar', tags=[], config=NodeConfig(), meta={}, compiled=True, extra_ctes=[InjectedCTE('whatever', 'select * from other')], extra_ctes_injected=True, compiled_sql='with whatever as (select * from other) select * from whatever', checksum=FileHash.from_contents(''), unrendered_config={} )
def test_select_state_changed_seed_checksum_sha_to_sha(manifest, previous_state, seed): change_node(manifest, seed.replace(checksum=FileHash.from_contents('changed'))) method = statemethod(manifest, previous_state) assert search_manifest_using_method(manifest, method, 'modified') == {'seed'} assert not search_manifest_using_method(manifest, method, 'new')
def make_model(pkg, name, sql, refs=None, sources=None, tags=None, path=None, alias=None, config_kwargs=None, fqn_extras=None): if refs is None: refs = [] if sources is None: sources = [] if tags is None: tags = [] if path is None: path = f'{name}.sql' if alias is None: alias = name if config_kwargs is None: config_kwargs = {} if fqn_extras is None: fqn_extras = [] fqn = [pkg] + fqn_extras + [name] depends_on_nodes = [] source_values = [] ref_values = [] for ref in refs: ref_values.append([ref.name]) depends_on_nodes.append(ref.unique_id) for src in sources: source_values.append([src.source_name, src.name]) depends_on_nodes.append(src.unique_id) return ParsedModelNode( raw_sql=sql, database='dbt', schema='dbt_schema', alias=alias, name=name, fqn=fqn, unique_id=f'model.{pkg}.{name}', package_name=pkg, root_path='/usr/dbt/some-project', path=path, original_file_path=f'models/{path}', config=NodeConfig(**config_kwargs), tags=tags, refs=ref_values, sources=source_values, depends_on=DependsOn(nodes=depends_on_nodes), resource_type=NodeType.Model, checksum=FileHash.from_contents(''), )
def load_source_file(path: FilePath, parse_file_type: ParseFileType, project_name: str) -> SourceFile: file_contents = load_file_contents(path.absolute_path, strip=False) checksum = FileHash.from_contents(file_contents) source_file = SourceFile(path=path, checksum=checksum, parse_file_type=parse_file_type, project_name=project_name) source_file.contents = file_contents.strip() return source_file
def use_models(self, models): for k, v in models.items(): path = FilePath( searched_path='models', project_root=os.path.normcase(os.getcwd()), relative_path='{}.sql'.format(k), ) # FileHash can't be empty or 'search_key' will be None source_file = SourceFile(path=path, checksum=FileHash.from_contents('abc')) source_file.contents = v self.mock_models.append(source_file)
def load_seed_source_file(match: FilePath, project_name) -> SourceFile: if match.seed_too_large(): # We don't want to calculate a hash of this file. Use the path. source_file = SourceFile.big_seed(match) else: file_contents = load_file_contents(match.absolute_path, strip=False) checksum = FileHash.from_contents(file_contents) source_file = SourceFile(path=match, checksum=checksum) source_file.contents = '' source_file.parse_file_type = ParseFileType.Seed source_file.project_name = project_name return source_file
def test_select_state_changed_seed_checksum_path_to_path( manifest, previous_state, seed): change_node( previous_state.manifest, seed.replace( checksum=FileHash(name='path', checksum=seed.original_file_path))) change_node( manifest, seed.replace( checksum=FileHash(name='path', checksum=seed.original_file_path))) method = statemethod(manifest, previous_state) with mock.patch( 'dbt.contracts.graph.parsed.warn_or_error') as warn_or_error_patch: assert not search_manifest_using_method(manifest, method, 'modified') warn_or_error_patch.assert_called_once() msg = warn_or_error_patch.call_args[0][0] assert msg.startswith('Found a seed (pkg.seed) >1MB in size') with mock.patch( 'dbt.contracts.graph.parsed.warn_or_error') as warn_or_error_patch: assert not search_manifest_using_method(manifest, method, 'new') warn_or_error_patch.assert_not_called()
def make_data_test(pkg, name, sql, refs=None, sources=None, tags=None, path=None, config_kwargs=None): if refs is None: refs = [] if sources is None: sources = [] if tags is None: tags = ['data'] if path is None: path = f'{name}.sql' if config_kwargs is None: config_kwargs = {} fqn = ['minimal', 'data_test', name] depends_on_nodes = [] source_values = [] ref_values = [] for ref in refs: ref_values.append([ref.name]) depends_on_nodes.append(ref.unique_id) for src in sources: source_values.append([src.source_name, src.name]) depends_on_nodes.append(src.unique_id) return ParsedDataTestNode( raw_sql=sql, database='dbt', schema='dbt_schema', name=name, alias=name, fqn=fqn, unique_id=f'test.{pkg}.{name}', package_name=pkg, root_path='/usr/dbt/some-project', path=path, original_file_path=f'tests/{path}', config=TestConfig(**config_kwargs), tags=tags, refs=ref_values, sources=source_values, depends_on=DependsOn(nodes=depends_on_nodes), resource_type=NodeType.Test, checksum=FileHash.from_contents(''), )
def make_seed(pkg, name, path=None, loader=None, alias=None, tags=None, fqn_extras=None, checksum=None): if alias is None: alias = name if tags is None: tags = [] if path is None: path = f'{name}.csv' if fqn_extras is None: fqn_extras = [] if checksum is None: checksum = FileHash.from_contents('') fqn = [pkg] + fqn_extras + [name] return ParsedSeedNode( raw_sql='', database='dbt', schema='dbt_schema', alias=alias, name=name, fqn=fqn, unique_id=f'seed.{pkg}.{name}', package_name=pkg, root_path='/usr/dbt/some-project', path=path, original_file_path=f'data/{path}', tags=tags, resource_type=NodeType.Seed, checksum=FileHash.from_contents(''), )
def file_block_for(self, data: str, filename: str, searched: str): root_dir = get_abs_os_path('./dbt_modules/snowplow') filename = normalize(filename) path = FilePath( searched_path=searched, relative_path=filename, project_root=root_dir, ) source_file = SourceFile( path=path, checksum=FileHash.from_contents(data), ) source_file.contents = data return FileBlock(file=source_file)
def load_file( self, path: FilePath, *, set_contents: bool = True, ) -> SourceFile: file_contents = load_file_contents(path.absolute_path, strip=False) checksum = FileHash.from_contents(file_contents) source_file = SourceFile(path=path, checksum=checksum) if set_contents: source_file.contents = file_contents.strip() else: source_file.contents = '' return source_file
def test_select_state_changed_seed_checksum_path_to_sha( manifest, previous_state, seed): change_node( previous_state.manifest, seed.replace( checksum=FileHash(name='path', checksum=seed.original_file_path))) method = statemethod(manifest, previous_state) with mock.patch( 'dbt.contracts.graph.parsed.warn_or_error') as warn_or_error_patch: assert search_manifest_using_method(manifest, method, 'modified') == {'seed'} warn_or_error_patch.assert_not_called() with mock.patch( 'dbt.contracts.graph.parsed.warn_or_error') as warn_or_error_patch: assert not search_manifest_using_method(manifest, method, 'new') warn_or_error_patch.assert_not_called()
def create_test_node( self, target: Union[UnpatchedSourceDefinition, UnparsedNodeUpdate], path: str, config: ContextConfig, tags: List[str], fqn: List[str], name: str, raw_sql: str, test_metadata: Dict[str, Any], column_name: Optional[str], ) -> ParsedSchemaTestNode: dct = { 'alias': name, 'schema': self.default_schema, 'database': self.default_database, 'fqn': fqn, 'name': name, 'root_path': self.project.project_root, 'resource_type': self.resource_type, 'tags': tags, 'path': path, 'original_file_path': target.original_file_path, 'package_name': self.project.project_name, 'raw_sql': raw_sql, 'unique_id': self.generate_unique_id(name), 'config': self.config_dict(config), 'test_metadata': test_metadata, 'column_name': column_name, 'checksum': FileHash.empty().to_dict(omit_none=True), } try: ParsedSchemaTestNode.validate(dct) return ParsedSchemaTestNode.from_dict(dct) except ValidationError as exc: msg = validator_error_message(exc) # this is a bit silly, but build an UnparsedNode just for error # message reasons node = self._create_error_node( name=target.name, path=path, original_file_path=target.original_file_path, raw_sql=raw_sql, ) raise CompilationException(msg, node=node) from exc
def setUp(self): self.model = ParsedModelNode( alias='model_one', name='model_one', database='dbt', schema='analytics', resource_type=NodeType.Model, unique_id='model.root.model_one', fqn=['root', 'model_one'], package_name='root', original_file_path='model_one.sql', root_path='/usr/src/app', refs=[], sources=[], depends_on=DependsOn(), config=NodeConfig.from_dict({ 'enabled': True, 'materialized': 'view', 'persist_docs': {}, 'post-hook': [], 'pre-hook': [], 'vars': {}, 'quoting': {}, 'column_types': {}, 'tags': [], }), tags=[], path='model_one.sql', raw_sql='', description='', columns={}, checksum=FileHash.from_contents(''), ) self.context = mock.MagicMock() self.provider = VarProvider({}) self.config = mock.MagicMock(config_version=2, vars=self.provider, cli_vars={}, project_name='root')
def test__prepend_ctes__already_has_cte(self): ephemeral_config = self.model_config.replace(materialized='ephemeral') manifest = Manifest( macros={}, nodes={ 'model.root.view': CompiledModelNode( name='view', database='dbt', schema='analytics', alias='view', resource_type=NodeType.Model, unique_id='model.root.view', fqn=['root', 'view'], package_name='root', root_path='/usr/src/app', refs=[], sources=[], depends_on=DependsOn(nodes=['model.root.ephemeral']), config=self.model_config, tags=[], path='view.sql', original_file_path='view.sql', raw_sql='select * from {{ref("ephemeral")}}', compiled=True, extra_ctes_injected=False, extra_ctes=[InjectedCTE(id='model.root.ephemeral', sql='select * from source_table')], compiled_sql=( 'with cte as (select * from something_else) ' 'select * from __dbt__cte__ephemeral'), checksum=FileHash.from_contents(''), ), 'model.root.ephemeral': CompiledModelNode( name='ephemeral', database='dbt', schema='analytics', alias='view', resource_type=NodeType.Model, unique_id='model.root.ephemeral', fqn=['root', 'ephemeral'], package_name='root', root_path='/usr/src/app', refs=[], sources=[], depends_on=DependsOn(), config=ephemeral_config, tags=[], path='ephemeral.sql', original_file_path='ephemeral.sql', raw_sql='select * from source_table', compiled=True, compiled_sql='select * from source_table', extra_ctes_injected=False, extra_ctes=[], checksum=FileHash.from_contents(''), ), }, sources={}, docs={}, disabled=[], files={}, exposures={}, selectors={}, ) compiler = dbt.compilation.Compiler(self.config) result, _ = compiler._recursively_prepend_ctes( manifest.nodes['model.root.view'], manifest, {} ) self.assertEqual(result, manifest.nodes['model.root.view']) self.assertEqual(result.extra_ctes_injected, True) self.assertEqualIgnoreWhitespace( result.compiled_sql, ('with __dbt__cte__ephemeral as (' 'select * from source_table' '), cte as (select * from something_else) ' 'select * from __dbt__cte__ephemeral')) self.assertEqual( manifest.nodes['model.root.ephemeral'].extra_ctes_injected, False)
def test__prepend_ctes__no_ctes(self): manifest = Manifest( macros={}, nodes={ 'model.root.view': CompiledModelNode( name='view', database='dbt', schema='analytics', alias='view', resource_type=NodeType.Model, unique_id='model.root.view', fqn=['root', 'view'], package_name='root', root_path='/usr/src/app', refs=[], sources=[], depends_on=DependsOn(), config=self.model_config, tags=[], path='view.sql', original_file_path='view.sql', raw_sql=('with cte as (select * from something_else) ' 'select * from source_table'), compiled=True, extra_ctes_injected=False, extra_ctes=[], compiled_sql=('with cte as (select * from something_else) ' 'select * from source_table'), checksum=FileHash.from_contents(''), ), 'model.root.view_no_cte': CompiledModelNode( name='view_no_cte', database='dbt', schema='analytics', alias='view_no_cte', resource_type=NodeType.Model, unique_id='model.root.view_no_cte', fqn=['root', 'view_no_cte'], package_name='root', root_path='/usr/src/app', refs=[], sources=[], depends_on=DependsOn(), config=self.model_config, tags=[], path='view.sql', original_file_path='view.sql', raw_sql='select * from source_table', compiled=True, extra_ctes_injected=False, extra_ctes=[], compiled_sql=('select * from source_table'), checksum=FileHash.from_contents(''), ), }, sources={}, docs={}, disabled=[], files={}, exposures={}, selectors={}, ) compiler = dbt.compilation.Compiler(self.config) result, _ = compiler._recursively_prepend_ctes( manifest.nodes['model.root.view'], manifest, {} ) self.assertEqual( result, manifest.nodes.get('model.root.view')) self.assertTrue(result.extra_ctes_injected) self.assertEqualIgnoreWhitespace( result.compiled_sql, manifest.nodes.get('model.root.view').compiled_sql) compiler = dbt.compilation.Compiler(self.config) result, _ = compiler._recursively_prepend_ctes( manifest.nodes.get('model.root.view_no_cte'), manifest, {}) self.assertEqual( result, manifest.nodes.get('model.root.view_no_cte')) self.assertTrue(result.extra_ctes_injected) self.assertEqualIgnoreWhitespace( result.compiled_sql, manifest.nodes.get('model.root.view_no_cte').compiled_sql)
def make_schema_test(pkg, test_name, test_model, test_kwargs, path=None, refs=None, sources=None, tags=None, column_name=None): kwargs = test_kwargs.copy() ref_values = [] source_values = [] # this doesn't really have to be correct if isinstance(test_model, ParsedSourceDefinition): kwargs['model'] = "{{ source('" + test_model.source_name + \ "', '" + test_model.name + "') }}" source_values.append([test_model.source_name, test_model.name]) else: kwargs['model'] = "{{ ref('" + test_model.name + "')}}" ref_values.append([test_model.name]) if column_name is not None: kwargs['column_name'] = column_name # whatever args_name = test_model.search_name.replace(".", "_") if column_name is not None: args_name += '_' + column_name node_name = f'{test_name}_{args_name}' raw_sql = '{{ config(severity="ERROR") }}{{ test_' + \ test_name + '(**dbt_schema_test_kwargs) }}' name_parts = test_name.split('.') if len(name_parts) == 2: namespace, test_name = name_parts macro_depends = f'model.{namespace}.{test_name}' elif len(name_parts) == 1: namespace = None macro_depends = f'model.dbt.{test_name}' else: assert False, f'invalid test name: {test_name}' if path is None: path = 'schema.yml' if tags is None: tags = ['schema'] if refs is None: refs = [] if sources is None: sources = [] depends_on_nodes = [] for ref in refs: ref_values.append([ref.name]) depends_on_nodes.append(ref.unique_id) for source in sources: source_values.append([source.source_name, source.name]) depends_on_nodes.append(source.unique_id) return ParsedSchemaTestNode( raw_sql=raw_sql, test_metadata=TestMetadata( namespace=namespace, name=test_name, kwargs=kwargs, ), database='dbt', schema='dbt_postgres', name=node_name, alias=node_name, fqn=['minimal', 'schema_test', node_name], unique_id=f'test.{pkg}.{node_name}', package_name=pkg, root_path='/usr/dbt/some-project', path=f'schema_test/{node_name}.sql', original_file_path=f'models/{path}', resource_type=NodeType.Test, tags=tags, refs=ref_values, sources=[], depends_on=DependsOn(macros=[macro_depends], nodes=depends_on_nodes), column_name=column_name, checksum=FileHash.from_contents(''), )
def rpc(cls): # ugh! return cls(FileHash.empty(), FileHash.empty(), {})