Ejemplo n.º 1
0
def make_parse_result(
    config: RuntimeConfig, all_projects: Mapping[str, Project]
) -> ParseResult:
    """Make a ParseResult from the project configuration and the profile."""
    # if any of these change, we need to reject the parser
    vars_hash = FileHash.from_contents(
        '\x00'.join([
            getattr(config.args, 'vars', '{}') or '{}',
            getattr(config.args, 'profile', '') or '',
            getattr(config.args, 'target', '') or '',
            __version__
        ])
    )
    profile_path = os.path.join(config.args.profiles_dir, 'profiles.yml')
    with open(profile_path) as fp:
        profile_hash = FileHash.from_contents(fp.read())

    project_hashes = {}
    for name, project in all_projects.items():
        path = os.path.join(project.project_root, 'dbt_project.yml')
        with open(path) as fp:
            project_hashes[name] = FileHash.from_contents(fp.read())

    return ParseResult(
        vars_hash=vars_hash,
        profile_hash=profile_hash,
        project_hashes=project_hashes,
    )
Ejemplo n.º 2
0
    def build_manifest_state_check(self):
        config = self.root_project
        all_projects = self.all_projects
        # if any of these change, we need to reject the parser
        vars_hash = FileHash.from_contents(
            '\x00'.join([
                getattr(config.args, 'vars', '{}') or '{}',
                getattr(config.args, 'profile', '') or '',
                getattr(config.args, 'target', '') or '',
                __version__
            ])
        )

        profile_path = os.path.join(config.args.profiles_dir, 'profiles.yml')
        with open(profile_path) as fp:
            profile_hash = FileHash.from_contents(fp.read())

        project_hashes = {}
        for name, project in all_projects.items():
            path = os.path.join(project.project_root, 'dbt_project.yml')
            with open(path) as fp:
                project_hashes[name] = FileHash.from_contents(fp.read())

        state_check = ManifestStateCheck(
            vars_hash=vars_hash,
            profile_hash=profile_hash,
            project_hashes=project_hashes,
        )
        return state_check
Ejemplo n.º 3
0
 def _mock_state_check(self):
     config = self.root_project
     all_projects = self.all_projects
     return ManifestStateCheck(
         vars_hash=FileHash.from_contents('vars'),
         project_hashes={name: FileHash.from_contents(name) for name in all_projects},
         profile_hash=FileHash.from_contents('profile'),
     )
Ejemplo n.º 4
0
def basic_compiled_schema_test_node():
    return CompiledSchemaTestNode(
        package_name='test',
        root_path='/root/',
        path='/root/x/path.sql',
        original_file_path='/root/path.sql',
        raw_sql='select * from {{ ref("other") }}',
        name='foo',
        resource_type=NodeType.Test,
        unique_id='model.test.foo',
        fqn=['test', 'models', 'foo'],
        refs=[],
        sources=[],
        depends_on=DependsOn(),
        deferred=False,
        description='',
        database='test_db',
        schema='test_schema',
        alias='bar',
        tags=[],
        config=TestConfig(severity='warn'),
        meta={},
        compiled=True,
        extra_ctes=[InjectedCTE('whatever', 'select * from other')],
        extra_ctes_injected=True,
        compiled_sql='with whatever as (select * from other) select * from whatever',
        column_name='id',
        test_metadata=TestMetadata(namespace=None, name='foo', kwargs={}),
        checksum=FileHash.from_contents(''),
        unrendered_config={
            'severity': 'warn',
        }
    )
Ejemplo n.º 5
0
def basic_compiled_model():
    return CompiledModelNode(
        package_name='test',
        root_path='/root/',
        path='/root/models/foo.sql',
        original_file_path='models/foo.sql',
        raw_sql='select * from {{ ref("other") }}',
        name='foo',
        resource_type=NodeType.Model,
        unique_id='model.test.foo',
        fqn=['test', 'models', 'foo'],
        refs=[],
        sources=[],
        depends_on=DependsOn(),
        deferred=True,
        description='',
        database='test_db',
        schema='test_schema',
        alias='bar',
        tags=[],
        config=NodeConfig(),
        meta={},
        compiled=True,
        extra_ctes=[InjectedCTE('whatever', 'select * from other')],
        extra_ctes_injected=True,
        compiled_sql='with whatever as (select * from other) select * from whatever',
        checksum=FileHash.from_contents(''),
        unrendered_config={}
    )
Ejemplo n.º 6
0
def test_select_state_changed_seed_checksum_sha_to_sha(manifest,
                                                       previous_state, seed):
    change_node(manifest,
                seed.replace(checksum=FileHash.from_contents('changed')))
    method = statemethod(manifest, previous_state)
    assert search_manifest_using_method(manifest, method,
                                        'modified') == {'seed'}
    assert not search_manifest_using_method(manifest, method, 'new')
Ejemplo n.º 7
0
def make_model(pkg,
               name,
               sql,
               refs=None,
               sources=None,
               tags=None,
               path=None,
               alias=None,
               config_kwargs=None,
               fqn_extras=None):
    if refs is None:
        refs = []
    if sources is None:
        sources = []
    if tags is None:
        tags = []
    if path is None:
        path = f'{name}.sql'
    if alias is None:
        alias = name
    if config_kwargs is None:
        config_kwargs = {}

    if fqn_extras is None:
        fqn_extras = []

    fqn = [pkg] + fqn_extras + [name]

    depends_on_nodes = []
    source_values = []
    ref_values = []
    for ref in refs:
        ref_values.append([ref.name])
        depends_on_nodes.append(ref.unique_id)
    for src in sources:
        source_values.append([src.source_name, src.name])
        depends_on_nodes.append(src.unique_id)

    return ParsedModelNode(
        raw_sql=sql,
        database='dbt',
        schema='dbt_schema',
        alias=alias,
        name=name,
        fqn=fqn,
        unique_id=f'model.{pkg}.{name}',
        package_name=pkg,
        root_path='/usr/dbt/some-project',
        path=path,
        original_file_path=f'models/{path}',
        config=NodeConfig(**config_kwargs),
        tags=tags,
        refs=ref_values,
        sources=source_values,
        depends_on=DependsOn(nodes=depends_on_nodes),
        resource_type=NodeType.Model,
        checksum=FileHash.from_contents(''),
    )
Ejemplo n.º 8
0
def load_source_file(path: FilePath, parse_file_type: ParseFileType,
                     project_name: str) -> SourceFile:
    file_contents = load_file_contents(path.absolute_path, strip=False)
    checksum = FileHash.from_contents(file_contents)
    source_file = SourceFile(path=path,
                             checksum=checksum,
                             parse_file_type=parse_file_type,
                             project_name=project_name)
    source_file.contents = file_contents.strip()
    return source_file
Ejemplo n.º 9
0
 def use_models(self, models):
     for k, v in models.items():
         path = FilePath(
             searched_path='models',
             project_root=os.path.normcase(os.getcwd()),
             relative_path='{}.sql'.format(k),
         )
         # FileHash can't be empty or 'search_key' will be None
         source_file = SourceFile(path=path, checksum=FileHash.from_contents('abc'))
         source_file.contents = v
         self.mock_models.append(source_file)
Ejemplo n.º 10
0
def load_seed_source_file(match: FilePath, project_name) -> SourceFile:
    if match.seed_too_large():
        # We don't want to calculate a hash of this file. Use the path.
        source_file = SourceFile.big_seed(match)
    else:
        file_contents = load_file_contents(match.absolute_path, strip=False)
        checksum = FileHash.from_contents(file_contents)
        source_file = SourceFile(path=match, checksum=checksum)
        source_file.contents = ''
    source_file.parse_file_type = ParseFileType.Seed
    source_file.project_name = project_name
    return source_file
Ejemplo n.º 11
0
def test_select_state_changed_seed_checksum_path_to_path(
        manifest, previous_state, seed):
    change_node(
        previous_state.manifest,
        seed.replace(
            checksum=FileHash(name='path', checksum=seed.original_file_path)))
    change_node(
        manifest,
        seed.replace(
            checksum=FileHash(name='path', checksum=seed.original_file_path)))
    method = statemethod(manifest, previous_state)
    with mock.patch(
            'dbt.contracts.graph.parsed.warn_or_error') as warn_or_error_patch:
        assert not search_manifest_using_method(manifest, method, 'modified')
        warn_or_error_patch.assert_called_once()
        msg = warn_or_error_patch.call_args[0][0]
        assert msg.startswith('Found a seed (pkg.seed) >1MB in size')
    with mock.patch(
            'dbt.contracts.graph.parsed.warn_or_error') as warn_or_error_patch:
        assert not search_manifest_using_method(manifest, method, 'new')
        warn_or_error_patch.assert_not_called()
Ejemplo n.º 12
0
def make_data_test(pkg,
                   name,
                   sql,
                   refs=None,
                   sources=None,
                   tags=None,
                   path=None,
                   config_kwargs=None):

    if refs is None:
        refs = []
    if sources is None:
        sources = []
    if tags is None:
        tags = ['data']
    if path is None:
        path = f'{name}.sql'

    if config_kwargs is None:
        config_kwargs = {}

    fqn = ['minimal', 'data_test', name]

    depends_on_nodes = []
    source_values = []
    ref_values = []
    for ref in refs:
        ref_values.append([ref.name])
        depends_on_nodes.append(ref.unique_id)
    for src in sources:
        source_values.append([src.source_name, src.name])
        depends_on_nodes.append(src.unique_id)

    return ParsedDataTestNode(
        raw_sql=sql,
        database='dbt',
        schema='dbt_schema',
        name=name,
        alias=name,
        fqn=fqn,
        unique_id=f'test.{pkg}.{name}',
        package_name=pkg,
        root_path='/usr/dbt/some-project',
        path=path,
        original_file_path=f'tests/{path}',
        config=TestConfig(**config_kwargs),
        tags=tags,
        refs=ref_values,
        sources=source_values,
        depends_on=DependsOn(nodes=depends_on_nodes),
        resource_type=NodeType.Test,
        checksum=FileHash.from_contents(''),
    )
Ejemplo n.º 13
0
def make_seed(pkg,
              name,
              path=None,
              loader=None,
              alias=None,
              tags=None,
              fqn_extras=None,
              checksum=None):
    if alias is None:
        alias = name
    if tags is None:
        tags = []
    if path is None:
        path = f'{name}.csv'

    if fqn_extras is None:
        fqn_extras = []

    if checksum is None:
        checksum = FileHash.from_contents('')

    fqn = [pkg] + fqn_extras + [name]
    return ParsedSeedNode(
        raw_sql='',
        database='dbt',
        schema='dbt_schema',
        alias=alias,
        name=name,
        fqn=fqn,
        unique_id=f'seed.{pkg}.{name}',
        package_name=pkg,
        root_path='/usr/dbt/some-project',
        path=path,
        original_file_path=f'data/{path}',
        tags=tags,
        resource_type=NodeType.Seed,
        checksum=FileHash.from_contents(''),
    )
Ejemplo n.º 14
0
 def file_block_for(self, data: str, filename: str, searched: str):
     root_dir = get_abs_os_path('./dbt_modules/snowplow')
     filename = normalize(filename)
     path = FilePath(
         searched_path=searched,
         relative_path=filename,
         project_root=root_dir,
     )
     source_file = SourceFile(
         path=path,
         checksum=FileHash.from_contents(data),
     )
     source_file.contents = data
     return FileBlock(file=source_file)
Ejemplo n.º 15
0
 def load_file(
     self,
     path: FilePath,
     *,
     set_contents: bool = True,
 ) -> SourceFile:
     file_contents = load_file_contents(path.absolute_path, strip=False)
     checksum = FileHash.from_contents(file_contents)
     source_file = SourceFile(path=path, checksum=checksum)
     if set_contents:
         source_file.contents = file_contents.strip()
     else:
         source_file.contents = ''
     return source_file
Ejemplo n.º 16
0
def test_select_state_changed_seed_checksum_path_to_sha(
        manifest, previous_state, seed):
    change_node(
        previous_state.manifest,
        seed.replace(
            checksum=FileHash(name='path', checksum=seed.original_file_path)))
    method = statemethod(manifest, previous_state)
    with mock.patch(
            'dbt.contracts.graph.parsed.warn_or_error') as warn_or_error_patch:
        assert search_manifest_using_method(manifest, method,
                                            'modified') == {'seed'}
        warn_or_error_patch.assert_not_called()
    with mock.patch(
            'dbt.contracts.graph.parsed.warn_or_error') as warn_or_error_patch:
        assert not search_manifest_using_method(manifest, method, 'new')
        warn_or_error_patch.assert_not_called()
Ejemplo n.º 17
0
    def create_test_node(
        self,
        target: Union[UnpatchedSourceDefinition, UnparsedNodeUpdate],
        path: str,
        config: ContextConfig,
        tags: List[str],
        fqn: List[str],
        name: str,
        raw_sql: str,
        test_metadata: Dict[str, Any],
        column_name: Optional[str],
    ) -> ParsedSchemaTestNode:

        dct = {
            'alias': name,
            'schema': self.default_schema,
            'database': self.default_database,
            'fqn': fqn,
            'name': name,
            'root_path': self.project.project_root,
            'resource_type': self.resource_type,
            'tags': tags,
            'path': path,
            'original_file_path': target.original_file_path,
            'package_name': self.project.project_name,
            'raw_sql': raw_sql,
            'unique_id': self.generate_unique_id(name),
            'config': self.config_dict(config),
            'test_metadata': test_metadata,
            'column_name': column_name,
            'checksum': FileHash.empty().to_dict(omit_none=True),
        }
        try:
            ParsedSchemaTestNode.validate(dct)
            return ParsedSchemaTestNode.from_dict(dct)
        except ValidationError as exc:
            msg = validator_error_message(exc)
            # this is a bit silly, but build an UnparsedNode just for error
            # message reasons
            node = self._create_error_node(
                name=target.name,
                path=path,
                original_file_path=target.original_file_path,
                raw_sql=raw_sql,
            )
            raise CompilationException(msg, node=node) from exc
Ejemplo n.º 18
0
 def setUp(self):
     self.model = ParsedModelNode(
         alias='model_one',
         name='model_one',
         database='dbt',
         schema='analytics',
         resource_type=NodeType.Model,
         unique_id='model.root.model_one',
         fqn=['root', 'model_one'],
         package_name='root',
         original_file_path='model_one.sql',
         root_path='/usr/src/app',
         refs=[],
         sources=[],
         depends_on=DependsOn(),
         config=NodeConfig.from_dict({
             'enabled': True,
             'materialized': 'view',
             'persist_docs': {},
             'post-hook': [],
             'pre-hook': [],
             'vars': {},
             'quoting': {},
             'column_types': {},
             'tags': [],
         }),
         tags=[],
         path='model_one.sql',
         raw_sql='',
         description='',
         columns={},
         checksum=FileHash.from_contents(''),
     )
     self.context = mock.MagicMock()
     self.provider = VarProvider({})
     self.config = mock.MagicMock(config_version=2,
                                  vars=self.provider,
                                  cli_vars={},
                                  project_name='root')
Ejemplo n.º 19
0
    def test__prepend_ctes__already_has_cte(self):
        ephemeral_config = self.model_config.replace(materialized='ephemeral')

        manifest = Manifest(
            macros={},
            nodes={
                'model.root.view': CompiledModelNode(
                    name='view',
                    database='dbt',
                    schema='analytics',
                    alias='view',
                    resource_type=NodeType.Model,
                    unique_id='model.root.view',
                    fqn=['root', 'view'],
                    package_name='root',
                    root_path='/usr/src/app',
                    refs=[],
                    sources=[],
                    depends_on=DependsOn(nodes=['model.root.ephemeral']),
                    config=self.model_config,
                    tags=[],
                    path='view.sql',
                    original_file_path='view.sql',
                    raw_sql='select * from {{ref("ephemeral")}}',
                    compiled=True,
                    extra_ctes_injected=False,
                    extra_ctes=[InjectedCTE(id='model.root.ephemeral', sql='select * from source_table')],
                    compiled_sql=(
                        'with cte as (select * from something_else) '
                        'select * from __dbt__cte__ephemeral'),
                    checksum=FileHash.from_contents(''),
                ),
                'model.root.ephemeral': CompiledModelNode(
                    name='ephemeral',
                    database='dbt',
                    schema='analytics',
                    alias='view',
                    resource_type=NodeType.Model,
                    unique_id='model.root.ephemeral',
                    fqn=['root', 'ephemeral'],
                    package_name='root',
                    root_path='/usr/src/app',
                    refs=[],
                    sources=[],
                    depends_on=DependsOn(),
                    config=ephemeral_config,
                    tags=[],
                    path='ephemeral.sql',
                    original_file_path='ephemeral.sql',
                    raw_sql='select * from source_table',
                    compiled=True,
                    compiled_sql='select * from source_table',
                    extra_ctes_injected=False,
                    extra_ctes=[],
                    checksum=FileHash.from_contents(''),
                ),
            },
            sources={},
            docs={},
            disabled=[],
            files={},
            exposures={},
            selectors={},
        )

        compiler = dbt.compilation.Compiler(self.config)
        result, _ = compiler._recursively_prepend_ctes(
            manifest.nodes['model.root.view'],
            manifest,
            {}
        )

        self.assertEqual(result, manifest.nodes['model.root.view'])
        self.assertEqual(result.extra_ctes_injected, True)
        self.assertEqualIgnoreWhitespace(
            result.compiled_sql,
            ('with __dbt__cte__ephemeral as ('
             'select * from source_table'
             '), cte as (select * from something_else) '
             'select * from __dbt__cte__ephemeral'))

        self.assertEqual(
            manifest.nodes['model.root.ephemeral'].extra_ctes_injected,
            False)
Ejemplo n.º 20
0
    def test__prepend_ctes__no_ctes(self):
        manifest = Manifest(
            macros={},
            nodes={
                'model.root.view': CompiledModelNode(
                    name='view',
                    database='dbt',
                    schema='analytics',
                    alias='view',
                    resource_type=NodeType.Model,
                    unique_id='model.root.view',
                    fqn=['root', 'view'],
                    package_name='root',
                    root_path='/usr/src/app',
                    refs=[],
                    sources=[],
                    depends_on=DependsOn(),
                    config=self.model_config,
                    tags=[],
                    path='view.sql',
                    original_file_path='view.sql',
                    raw_sql=('with cte as (select * from something_else) '
                             'select * from source_table'),
                    compiled=True,
                    extra_ctes_injected=False,
                    extra_ctes=[],
                    compiled_sql=('with cte as (select * from something_else) '
                                  'select * from source_table'),
                    checksum=FileHash.from_contents(''),
                ),
                'model.root.view_no_cte': CompiledModelNode(
                    name='view_no_cte',
                    database='dbt',
                    schema='analytics',
                    alias='view_no_cte',
                    resource_type=NodeType.Model,
                    unique_id='model.root.view_no_cte',
                    fqn=['root', 'view_no_cte'],
                    package_name='root',
                    root_path='/usr/src/app',
                    refs=[],
                    sources=[],
                    depends_on=DependsOn(),
                    config=self.model_config,
                    tags=[],
                    path='view.sql',
                    original_file_path='view.sql',
                    raw_sql='select * from source_table',
                    compiled=True,
                    extra_ctes_injected=False,
                    extra_ctes=[],
                    compiled_sql=('select * from source_table'),
                    checksum=FileHash.from_contents(''),
                ),
            },
            sources={},
            docs={},
            disabled=[],
            files={},
            exposures={},
            selectors={},
        )

        compiler = dbt.compilation.Compiler(self.config)
        result, _ = compiler._recursively_prepend_ctes(
            manifest.nodes['model.root.view'],
            manifest,
            {}
        )

        self.assertEqual(
            result,
            manifest.nodes.get('model.root.view'))
        self.assertTrue(result.extra_ctes_injected)
        self.assertEqualIgnoreWhitespace(
            result.compiled_sql,
            manifest.nodes.get('model.root.view').compiled_sql)

        compiler = dbt.compilation.Compiler(self.config)
        result, _ = compiler._recursively_prepend_ctes(
            manifest.nodes.get('model.root.view_no_cte'),
            manifest,
            {})

        self.assertEqual(
            result,
            manifest.nodes.get('model.root.view_no_cte'))
        self.assertTrue(result.extra_ctes_injected)
        self.assertEqualIgnoreWhitespace(
            result.compiled_sql,
            manifest.nodes.get('model.root.view_no_cte').compiled_sql)
Ejemplo n.º 21
0
def make_schema_test(pkg,
                     test_name,
                     test_model,
                     test_kwargs,
                     path=None,
                     refs=None,
                     sources=None,
                     tags=None,
                     column_name=None):
    kwargs = test_kwargs.copy()
    ref_values = []
    source_values = []
    # this doesn't really have to be correct
    if isinstance(test_model, ParsedSourceDefinition):
        kwargs['model'] = "{{ source('" + test_model.source_name + \
            "', '" + test_model.name + "') }}"
        source_values.append([test_model.source_name, test_model.name])
    else:
        kwargs['model'] = "{{ ref('" + test_model.name + "')}}"
        ref_values.append([test_model.name])
    if column_name is not None:
        kwargs['column_name'] = column_name

    # whatever
    args_name = test_model.search_name.replace(".", "_")
    if column_name is not None:
        args_name += '_' + column_name
    node_name = f'{test_name}_{args_name}'
    raw_sql = '{{ config(severity="ERROR") }}{{ test_' + \
        test_name + '(**dbt_schema_test_kwargs) }}'
    name_parts = test_name.split('.')

    if len(name_parts) == 2:
        namespace, test_name = name_parts
        macro_depends = f'model.{namespace}.{test_name}'
    elif len(name_parts) == 1:
        namespace = None
        macro_depends = f'model.dbt.{test_name}'
    else:
        assert False, f'invalid test name: {test_name}'

    if path is None:
        path = 'schema.yml'
    if tags is None:
        tags = ['schema']

    if refs is None:
        refs = []
    if sources is None:
        sources = []

    depends_on_nodes = []
    for ref in refs:
        ref_values.append([ref.name])
        depends_on_nodes.append(ref.unique_id)

    for source in sources:
        source_values.append([source.source_name, source.name])
        depends_on_nodes.append(source.unique_id)

    return ParsedSchemaTestNode(
        raw_sql=raw_sql,
        test_metadata=TestMetadata(
            namespace=namespace,
            name=test_name,
            kwargs=kwargs,
        ),
        database='dbt',
        schema='dbt_postgres',
        name=node_name,
        alias=node_name,
        fqn=['minimal', 'schema_test', node_name],
        unique_id=f'test.{pkg}.{node_name}',
        package_name=pkg,
        root_path='/usr/dbt/some-project',
        path=f'schema_test/{node_name}.sql',
        original_file_path=f'models/{path}',
        resource_type=NodeType.Test,
        tags=tags,
        refs=ref_values,
        sources=[],
        depends_on=DependsOn(macros=[macro_depends], nodes=depends_on_nodes),
        column_name=column_name,
        checksum=FileHash.from_contents(''),
    )
Ejemplo n.º 22
0
 def rpc(cls):
     # ugh!
     return cls(FileHash.empty(), FileHash.empty(), {})