def load_and_parse(self): """Load and parse archives in a list of projects. Returns a dict that maps unique ids onto ParsedNodes""" archives = [] to_return = {} for name, project in self.all_projects.items(): archives = archives + self.parse_archives_from_project(project) # We're going to have a similar issue with parsed nodes, if we want to # make parse_node return those. for a in archives: # archives have a config, but that would make for an invalid # UnparsedNode, so remove it and pass it along to parse_node as an # argument. archive_config = a.pop('config') archive = UnparsedNode(**a) node_path = self.get_path(archive.resource_type, archive.package_name, archive.name) to_return[node_path] = self.parse_node( archive, node_path, self.all_projects.get(archive.package_name), archive_config=archive_config) return to_return
def parse_seed_file(cls, file_match, root_dir, package_name): """Parse the given seed file, returning an UnparsedNode and the agate table. """ abspath = file_match['absolute_path'] logger.debug("Parsing {}".format(abspath)) table_name = os.path.basename(abspath)[:-4] node = UnparsedNode( path=file_match['relative_path'], name=table_name, root_path=root_dir, resource_type=NodeType.Seed, # Give this raw_sql so it conforms to the node spec, # use dummy text so it doesn't look like an empty node raw_sql='-- csv --', package_name=package_name, original_file_path=os.path.join(file_match.get('searched_path'), file_match.get('relative_path')), ) try: table = dbt.clients.agate_helper.from_csv(abspath) except ValueError as e: dbt.exceptions.raise_compiler_error(str(e), node) table.original_abspath = abspath return node, table
def parse_sql_nodes(self, nodes, tags=None): if tags is None: tags = [] to_return = {} disabled = [] for n in nodes: node = UnparsedNode(**n) package_name = node.package_name node_path = self.get_path(node.resource_type, package_name, node.name) project = self.all_projects.get(package_name) node_parsed = self.parse_node(node, node_path, project, tags=tags) # Ignore disabled nodes if not node_parsed['config']['enabled']: disabled.append(node_parsed) continue # Check for duplicate model names existing_node = to_return.get(node_path) if existing_node is not None: dbt.exceptions.raise_duplicate_resource_name( existing_node, node_parsed) to_return[node_path] = node_parsed return to_return, disabled
def build_unparsed_node(cls, model_name, package_name, test_type, test_args, test_namespace, root_dir, original_file_path): """Given a model name (for the model under test), a pacakge name, a test type (identifying the test macro to use), arguments dictionary, the root directory of the search, and the original file path to the schema.yml file that specified the test, build an UnparsedNode representing the test. """ test_path = os.path.basename(original_file_path) raw_sql = build_test_raw_sql(test_namespace, model_name, test_type, test_args) hashed_name, full_name = get_nice_schema_test_name( test_type, model_name, test_args) hashed_path = get_pseudo_test_path(hashed_name, test_path, 'schema_test') full_path = get_pseudo_test_path(full_name, test_path, 'schema_test') return UnparsedNode(name=full_name, resource_type=NodeType.Test, package_name=package_name, root_path=root_dir, path=hashed_path, original_file_path=original_file_path, raw_sql=raw_sql)
def parse_sql_node(self, node_dict, tags=None): if tags is None: tags = [] node = UnparsedNode(**node_dict) package_name = node.package_name unique_id = self.get_path(node.resource_type, package_name, node.name) project = self.all_projects.get(package_name) parse_ok = True if node.resource_type == NodeType.Model: parse_ok = self.check_block_parsing(node.name, node.original_file_path, node.raw_sql) node_parsed = self.parse_node(node, unique_id, project, tags=tags) if not parse_ok: # if we had a parse error in parse_node, we would not get here. So # this means we rejected a good file :( raise dbt.exceptions.InternalException( 'the block parser rejected a good node: {} was marked invalid ' 'but is actually valid!'.format(node.original_file_path)) return unique_id, node_parsed
def parse_schema_test(cls, test_base, model_name, test_config, test_namespace, test_type, root_project_config, package_project_config, all_projects, macros=None): if isinstance(test_config, (basestring, int, float, bool)): test_args = {'arg': test_config} else: test_args = test_config # sort the dict so the keys are rendered deterministically (for tests) kwargs = [as_kwarg(key, test_args[key]) for key in sorted(test_args)] if test_namespace is None: macro_name = "test_{}".format(test_type) else: macro_name = "{}.test_{}".format(test_namespace, test_type) raw_sql = "{{{{ {macro}(model=ref('{model}'), {kwargs}) }}}}".format( **{ 'model': model_name, 'macro': macro_name, 'kwargs': ", ".join(kwargs) } ) base_path = test_base.get('path') hashed_name, full_name = get_nice_schema_test_name(test_type, model_name, test_args) hashed_path = get_pseudo_test_path(hashed_name, base_path, 'schema_test') full_path = get_pseudo_test_path(full_name, base_path, 'schema_test') # supply our own fqn which overrides the hashed version from the path fqn_override = cls.get_fqn(full_path, package_project_config) package_name = test_base.get('package_name') node_path = cls.get_path(NodeType.Test, package_name, full_name) to_return = UnparsedNode( name=full_name, resource_type=test_base.get('resource_type'), package_name=package_name, root_path=test_base.get('root_path'), path=hashed_path, original_file_path=test_base.get('original_file_path'), raw_sql=raw_sql ) return cls.parse_node(to_return, node_path, root_project_config, package_project_config, all_projects, tags=['schema'], fqn_extra=None, fqn=fqn_override, macros=macros)
def build_test_node(self, test_target, package_name, test, root_dir, path, column_name=None): """Build a test node against the given target (a model or a source). :param test_target: An unparsed form of the target. """ test_type, test_args = _build_test_args(test, column_name) test_namespace, test_type, package_name = calculate_test_namespace( test_type, package_name) source_package = self.all_projects.get(package_name) if source_package is None: desc = '"{}" test on {}'.format( test_type, self._describe_test_target(test_target)) dbt.exceptions.raise_dep_not_found(None, desc, test_namespace) test_path = os.path.basename(path) hashed_name, full_name = self._generate_test_name( test_target, test_type, test_args) hashed_path = get_pseudo_test_path(hashed_name, test_path, 'schema_test') full_path = get_pseudo_test_path(full_name, test_path, 'schema_test') raw_sql = self._build_raw_sql(test_namespace, test_target, test_type, test_args) unparsed = UnparsedNode(name=full_name, resource_type=NodeType.Test, package_name=package_name, root_path=root_dir, path=hashed_path, original_file_path=path, raw_sql=raw_sql) # supply our own fqn which overrides the hashed version from the path # TODO: is this necessary even a little bit for tests? fqn_override = self.get_fqn(full_path, source_package) node_path = self.get_path(NodeType.Test, unparsed.package_name, unparsed.name) return self.parse_node(unparsed, node_path, source_package, tags=['schema'], fqn_extra=None, fqn=fqn_override, column_name=column_name)
def parse_sql_nodes(cls, nodes, root_project, projects, tags=None, macros=None): if tags is None: tags = [] if macros is None: macros = {} to_return = {} disabled = [] for n in nodes: node = UnparsedNode(**n) package_name = node.get('package_name') node_path = cls.get_path(node.get('resource_type'), package_name, node.get('name')) node_parsed = cls.parse_node(node, node_path, root_project, projects.get(package_name), projects, tags=tags, macros=macros) # Ignore disabled nodes if not node_parsed['config']['enabled']: disabled.append(node_parsed['fqn']) continue # Check for duplicate model names existing_node = to_return.get(node_path) if existing_node is not None: dbt.exceptions.raise_duplicate_resource_name( existing_node, node_parsed) to_return[node_path] = node_parsed return to_return, disabled
def _create_error_node( self, name: str, path: str, original_file_path: str, raw_sql: str, ) -> UnparsedNode: """If we hit an error before we've actually parsed a node, provide some level of useful information by attaching this to the exception. """ # this is a bit silly, but build an UnparsedNode just for error # message reasons return UnparsedNode( name=name, resource_type=self.resource_type, path=path, original_file_path=original_file_path, root_path=self.project.project_root, package_name=self.project.project_name, raw_sql=raw_sql, )
def test_empty(self): node_dict = { 'name': 'foo', 'root_path': '/root/', 'resource_type': NodeType.Model, 'path': '/root/x/path.sql', 'original_file_path': '/root/path.sql', 'package_name': 'test', 'raw_sql': ' \n', } node = UnparsedNode( package_name='test', root_path='/root/', path='/root/x/path.sql', original_file_path='/root/path.sql', raw_sql=' \n', name='foo', resource_type=NodeType.Model, ) self.assert_symmetric(node, node_dict) self.assertTrue(node.empty) self.assert_fails_validation(node_dict, cls=UnparsedRunHook) self.assert_fails_validation(node_dict, cls=UnparsedMacro)
def build_test_node(self, test_target, package_name, test, root_dir, path, column_name=None): """Build a test node against the given target (a model or a source). :param test_target: An unparsed form of the target. """ if isinstance(test, basestring): test = {test: {}} ctx = generate_config_context(self.root_project_config.cli_vars) test_info = self.Builder(test, test_target, column_name, package_name, ctx) source_package = self.all_projects.get(test_info.package_name) if source_package is None: desc = '"{}" test on {}'.format(test_info.name, test_info.describe_test_target()) dbt.exceptions.raise_dep_not_found(None, desc, test_info.namespace) test_path = os.path.basename(path) hashed_name, full_name = test_info.get_test_name() hashed_path = get_pseudo_test_path(hashed_name, test_path, 'schema_test') full_path = get_pseudo_test_path(full_name, test_path, 'schema_test') raw_sql = test_info.build_raw_sql() unparsed = UnparsedNode(name=full_name, resource_type=NodeType.Test, package_name=test_info.package_name, root_path=root_dir, path=hashed_path, original_file_path=path, raw_sql=raw_sql) # supply our own fqn which overrides the hashed version from the path # TODO: is this necessary even a little bit for tests? fqn_override = self.get_fqn(unparsed.incorporate(path=full_path), source_package) node_path = self.get_path(NodeType.Test, unparsed.package_name, unparsed.name) result = self.parse_node(unparsed, node_path, source_package, tags=['schema'], fqn_extra=None, fqn=fqn_override, column_name=column_name) parse_ok = self.check_block_parsing(full_name, test_path, raw_sql) if not parse_ok: # if we had a parse error in parse_node, we would not get here. So # this means we rejected a good file :( raise dbt.exceptions.InternalException( 'the block parser rejected a good node: {} was marked invalid ' 'but is actually valid!'.format(test_path)) return result