Beispiel #1
0
    def load_and_parse(self):
        """Load and parse archives in a list of projects. Returns a dict
           that maps unique ids onto ParsedNodes"""

        archives = []
        to_return = {}

        for name, project in self.all_projects.items():
            archives = archives + self.parse_archives_from_project(project)

        # We're going to have a similar issue with parsed nodes, if we want to
        # make parse_node return those.
        for a in archives:
            # archives have a config, but that would make for an invalid
            # UnparsedNode, so remove it and pass it along to parse_node as an
            # argument.
            archive_config = a.pop('config')
            archive = UnparsedNode(**a)
            node_path = self.get_path(archive.resource_type,
                                      archive.package_name, archive.name)

            to_return[node_path] = self.parse_node(
                archive,
                node_path,
                self.all_projects.get(archive.package_name),
                archive_config=archive_config)

        return to_return
Beispiel #2
0
 def parse_seed_file(cls, file_match, root_dir, package_name):
     """Parse the given seed file, returning an UnparsedNode and the agate
     table.
     """
     abspath = file_match['absolute_path']
     logger.debug("Parsing {}".format(abspath))
     table_name = os.path.basename(abspath)[:-4]
     node = UnparsedNode(
         path=file_match['relative_path'],
         name=table_name,
         root_path=root_dir,
         resource_type=NodeType.Seed,
         # Give this raw_sql so it conforms to the node spec,
         # use dummy text so it doesn't look like an empty node
         raw_sql='-- csv --',
         package_name=package_name,
         original_file_path=os.path.join(file_match.get('searched_path'),
                                         file_match.get('relative_path')),
     )
     try:
         table = dbt.clients.agate_helper.from_csv(abspath)
     except ValueError as e:
         dbt.exceptions.raise_compiler_error(str(e), node)
     table.original_abspath = abspath
     return node, table
Beispiel #3
0
    def parse_sql_nodes(self, nodes, tags=None):

        if tags is None:
            tags = []

        to_return = {}
        disabled = []

        for n in nodes:
            node = UnparsedNode(**n)
            package_name = node.package_name

            node_path = self.get_path(node.resource_type,
                                      package_name,
                                      node.name)

            project = self.all_projects.get(package_name)
            node_parsed = self.parse_node(node, node_path, project, tags=tags)

            # Ignore disabled nodes
            if not node_parsed['config']['enabled']:
                disabled.append(node_parsed)
                continue

            # Check for duplicate model names
            existing_node = to_return.get(node_path)
            if existing_node is not None:
                dbt.exceptions.raise_duplicate_resource_name(
                        existing_node, node_parsed)

            to_return[node_path] = node_parsed

        return to_return, disabled
Beispiel #4
0
    def build_unparsed_node(cls, model_name, package_name, test_type,
                            test_args, test_namespace, root_dir,
                            original_file_path):
        """Given a model name (for the model under test), a pacakge name,
        a test type (identifying the test macro to use), arguments dictionary,
        the root directory of the search, and the original file path to the
        schema.yml file that specified the test, build an UnparsedNode
        representing the test.
        """
        test_path = os.path.basename(original_file_path)

        raw_sql = build_test_raw_sql(test_namespace, model_name, test_type,
                                     test_args)

        hashed_name, full_name = get_nice_schema_test_name(
            test_type, model_name, test_args)

        hashed_path = get_pseudo_test_path(hashed_name, test_path,
                                           'schema_test')
        full_path = get_pseudo_test_path(full_name, test_path, 'schema_test')
        return UnparsedNode(name=full_name,
                            resource_type=NodeType.Test,
                            package_name=package_name,
                            root_path=root_dir,
                            path=hashed_path,
                            original_file_path=original_file_path,
                            raw_sql=raw_sql)
Beispiel #5
0
    def parse_sql_node(self, node_dict, tags=None):
        if tags is None:
            tags = []

        node = UnparsedNode(**node_dict)
        package_name = node.package_name

        unique_id = self.get_path(node.resource_type, package_name, node.name)

        project = self.all_projects.get(package_name)

        parse_ok = True
        if node.resource_type == NodeType.Model:
            parse_ok = self.check_block_parsing(node.name,
                                                node.original_file_path,
                                                node.raw_sql)

        node_parsed = self.parse_node(node, unique_id, project, tags=tags)
        if not parse_ok:
            # if we had a parse error in parse_node, we would not get here. So
            # this means we rejected a good file :(
            raise dbt.exceptions.InternalException(
                'the block parser rejected a good node: {} was marked invalid '
                'but is actually valid!'.format(node.original_file_path))
        return unique_id, node_parsed
Beispiel #6
0
    def parse_schema_test(cls, test_base, model_name, test_config,
                          test_namespace, test_type, root_project_config,
                          package_project_config, all_projects, macros=None):

        if isinstance(test_config, (basestring, int, float, bool)):
            test_args = {'arg': test_config}
        else:
            test_args = test_config

        # sort the dict so the keys are rendered deterministically (for tests)
        kwargs = [as_kwarg(key, test_args[key]) for key in sorted(test_args)]

        if test_namespace is None:
            macro_name = "test_{}".format(test_type)
        else:
            macro_name = "{}.test_{}".format(test_namespace, test_type)

        raw_sql = "{{{{ {macro}(model=ref('{model}'), {kwargs}) }}}}".format(
            **{
                'model': model_name,
                'macro': macro_name,
                'kwargs': ", ".join(kwargs)
            }
        )

        base_path = test_base.get('path')
        hashed_name, full_name = get_nice_schema_test_name(test_type,
                                                           model_name,
                                                           test_args)

        hashed_path = get_pseudo_test_path(hashed_name, base_path,
                                           'schema_test')
        full_path = get_pseudo_test_path(full_name, base_path,
                                         'schema_test')

        # supply our own fqn which overrides the hashed version from the path
        fqn_override = cls.get_fqn(full_path, package_project_config)
        package_name = test_base.get('package_name')
        node_path = cls.get_path(NodeType.Test, package_name, full_name)

        to_return = UnparsedNode(
            name=full_name,
            resource_type=test_base.get('resource_type'),
            package_name=package_name,
            root_path=test_base.get('root_path'),
            path=hashed_path,
            original_file_path=test_base.get('original_file_path'),
            raw_sql=raw_sql
        )

        return cls.parse_node(to_return,
                              node_path,
                              root_project_config,
                              package_project_config,
                              all_projects,
                              tags=['schema'],
                              fqn_extra=None,
                              fqn=fqn_override,
                              macros=macros)
Beispiel #7
0
    def build_test_node(self,
                        test_target,
                        package_name,
                        test,
                        root_dir,
                        path,
                        column_name=None):
        """Build a test node against the given target (a model or a source).

        :param test_target: An unparsed form of the target.
        """
        test_type, test_args = _build_test_args(test, column_name)

        test_namespace, test_type, package_name = calculate_test_namespace(
            test_type, package_name)

        source_package = self.all_projects.get(package_name)
        if source_package is None:
            desc = '"{}" test on {}'.format(
                test_type, self._describe_test_target(test_target))
            dbt.exceptions.raise_dep_not_found(None, desc, test_namespace)

        test_path = os.path.basename(path)

        hashed_name, full_name = self._generate_test_name(
            test_target, test_type, test_args)

        hashed_path = get_pseudo_test_path(hashed_name, test_path,
                                           'schema_test')

        full_path = get_pseudo_test_path(full_name, test_path, 'schema_test')
        raw_sql = self._build_raw_sql(test_namespace, test_target, test_type,
                                      test_args)
        unparsed = UnparsedNode(name=full_name,
                                resource_type=NodeType.Test,
                                package_name=package_name,
                                root_path=root_dir,
                                path=hashed_path,
                                original_file_path=path,
                                raw_sql=raw_sql)

        # supply our own fqn which overrides the hashed version from the path
        # TODO: is this necessary even a little bit for tests?
        fqn_override = self.get_fqn(full_path, source_package)

        node_path = self.get_path(NodeType.Test, unparsed.package_name,
                                  unparsed.name)

        return self.parse_node(unparsed,
                               node_path,
                               source_package,
                               tags=['schema'],
                               fqn_extra=None,
                               fqn=fqn_override,
                               column_name=column_name)
Beispiel #8
0
    def parse_sql_nodes(cls, nodes, root_project, projects,
                        tags=None, macros=None):

        if tags is None:
            tags = []

        if macros is None:
            macros = {}

        to_return = {}
        disabled = []

        for n in nodes:
            node = UnparsedNode(**n)
            package_name = node.get('package_name')

            node_path = cls.get_path(node.get('resource_type'),
                                     package_name,
                                     node.get('name'))

            node_parsed = cls.parse_node(node,
                                         node_path,
                                         root_project,
                                         projects.get(package_name),
                                         projects,
                                         tags=tags,
                                         macros=macros)

            # Ignore disabled nodes
            if not node_parsed['config']['enabled']:
                disabled.append(node_parsed['fqn'])
                continue

            # Check for duplicate model names
            existing_node = to_return.get(node_path)
            if existing_node is not None:
                dbt.exceptions.raise_duplicate_resource_name(
                        existing_node, node_parsed)

            to_return[node_path] = node_parsed

        return to_return, disabled
Beispiel #9
0
 def _create_error_node(
     self, name: str, path: str, original_file_path: str, raw_sql: str,
 ) -> UnparsedNode:
     """If we hit an error before we've actually parsed a node, provide some
     level of useful information by attaching this to the exception.
     """
     # this is a bit silly, but build an UnparsedNode just for error
     # message reasons
     return UnparsedNode(
         name=name,
         resource_type=self.resource_type,
         path=path,
         original_file_path=original_file_path,
         root_path=self.project.project_root,
         package_name=self.project.project_name,
         raw_sql=raw_sql,
     )
    def test_empty(self):
        node_dict = {
            'name': 'foo',
            'root_path': '/root/',
            'resource_type': NodeType.Model,
            'path': '/root/x/path.sql',
            'original_file_path': '/root/path.sql',
            'package_name': 'test',
            'raw_sql': '  \n',
        }
        node = UnparsedNode(
            package_name='test',
            root_path='/root/',
            path='/root/x/path.sql',
            original_file_path='/root/path.sql',
            raw_sql='  \n',
            name='foo',
            resource_type=NodeType.Model,
        )
        self.assert_symmetric(node, node_dict)
        self.assertTrue(node.empty)

        self.assert_fails_validation(node_dict, cls=UnparsedRunHook)
        self.assert_fails_validation(node_dict, cls=UnparsedMacro)
Beispiel #11
0
    def build_test_node(self,
                        test_target,
                        package_name,
                        test,
                        root_dir,
                        path,
                        column_name=None):
        """Build a test node against the given target (a model or a source).

        :param test_target: An unparsed form of the target.
        """
        if isinstance(test, basestring):
            test = {test: {}}

        ctx = generate_config_context(self.root_project_config.cli_vars)

        test_info = self.Builder(test, test_target, column_name, package_name,
                                 ctx)

        source_package = self.all_projects.get(test_info.package_name)
        if source_package is None:
            desc = '"{}" test on {}'.format(test_info.name,
                                            test_info.describe_test_target())
            dbt.exceptions.raise_dep_not_found(None, desc, test_info.namespace)

        test_path = os.path.basename(path)

        hashed_name, full_name = test_info.get_test_name()

        hashed_path = get_pseudo_test_path(hashed_name, test_path,
                                           'schema_test')

        full_path = get_pseudo_test_path(full_name, test_path, 'schema_test')
        raw_sql = test_info.build_raw_sql()

        unparsed = UnparsedNode(name=full_name,
                                resource_type=NodeType.Test,
                                package_name=test_info.package_name,
                                root_path=root_dir,
                                path=hashed_path,
                                original_file_path=path,
                                raw_sql=raw_sql)

        # supply our own fqn which overrides the hashed version from the path
        # TODO: is this necessary even a little bit for tests?
        fqn_override = self.get_fqn(unparsed.incorporate(path=full_path),
                                    source_package)

        node_path = self.get_path(NodeType.Test, unparsed.package_name,
                                  unparsed.name)

        result = self.parse_node(unparsed,
                                 node_path,
                                 source_package,
                                 tags=['schema'],
                                 fqn_extra=None,
                                 fqn=fqn_override,
                                 column_name=column_name)

        parse_ok = self.check_block_parsing(full_name, test_path, raw_sql)
        if not parse_ok:
            # if we had a parse error in parse_node, we would not get here. So
            # this means we rejected a good file :(
            raise dbt.exceptions.InternalException(
                'the block parser rejected a good node: {} was marked invalid '
                'but is actually valid!'.format(test_path))
        return result