Example #1
0
    def parse_project(
        self,
        project: Project,
        parser_files
    ) -> None:

        project_parser_info: List[ParserInfo] = []
        start_timer = time.perf_counter()
        total_path_count = 0

        # Loop through parsers with loaded files. Note: SchemaParser must be last
        parser_types: List[Type[Parser]] = [
            ModelParser, SnapshotParser, AnalysisParser, DataTestParser,
            SeedParser, DocumentationParser, SchemaParser]
        for parser_cls in parser_types:
            parser_name = parser_cls.__name__
            # No point in creating a parser if we don't have files for it
            if parser_name not in parser_files or not parser_files[parser_name]:
                continue

            # Initialize timing info
            parser_path_count = 0
            parser_start_timer = time.perf_counter()

            # Parse the project files for this parser
            parser: Parser = parser_cls(project, self.manifest, self.root_project)
            for search_key in parser_files[parser_name]:
                block = FileBlock(self.manifest.files[search_key])
                self.parse_with_cache(block, parser)
                parser_path_count = parser_path_count + 1

            # Save timing info
            project_parser_info.append(ParserInfo(
                parser=parser.resource_type,
                path_count=parser_path_count,
                elapsed=time.perf_counter() - parser_start_timer
            ))
            total_path_count = total_path_count + parser_path_count

        # HookParser doesn't run from loaded files, just dbt_project.yml,
        # so do separately
        hook_parser = HookParser(project, self.manifest, self.root_project)
        path = hook_parser.get_path()
        file_block = FileBlock(load_source_file(path, ParseFileType.Hook, project.project_name))
        self.parse_with_cache(file_block, hook_parser)

        # Store the performance info
        elapsed = time.perf_counter() - start_timer
        project_info = ProjectLoaderInfo(
            project_name=project.project_name,
            path_count=total_path_count,
            elapsed=elapsed,
            parsers=project_parser_info
        )
        self._perf_info.projects.append(project_info)
        self._perf_info.path_count = (
            self._perf_info.path_count + total_path_count
        )
    def test_model_no_cache(self):
        source_file = self._matching_file('models', 'model_1.sql')
        self.parser.load_file.return_value = source_file

        self.loader.old_manifest = None
        self.loader.parse_with_cache(FileBlock(source_file), self.parser)
        # there was nothing in the cache, so parse_file should get called
        # with a FileBlock that has the given source file in it
        self.parser.parse_file.assert_called_once_with(
            FileBlock(file=source_file))
Example #3
0
 def _get_file(self, path: FilePath, parser: BaseParser) -> FileBlock:
     if path.search_key in self._loaded_file_cache:
         block = self._loaded_file_cache[path.search_key]
     else:
         block = FileBlock(file=parser.load_file(path))
         self._loaded_file_cache[path.search_key] = block
     return block
    def test_model_cache_mismatch_checksum(self):
        source_file = self._mismatched_file('models', 'model_1.sql')
        self.parser.load_file.return_value = source_file

        source_file_dupe = self._mismatched_file('models', 'model_1.sql')
        source_file_dupe.nodes.append('model.root.model_1')

        old_manifest = self._new_manifest()
        old_manifest.files[source_file_dupe.path.search_key] = source_file_dupe
        old_manifest.nodes = {'model.root.model_1': mock.MagicMock()}
        self.loader.old_manifest = old_manifest

        self.loader.parse_with_cache(FileBlock(source_file), self.parser)
        # there was a cache checksum mismatch, so parse_file should get called
        # with a FileBlock that has the given source file in it
        self.parser.parse_file.assert_called_once_with(
            FileBlock(file=source_file))
Example #5
0
 def _build_file(self, contents, relative_path) -> FileBlock:
     match = FilePath(
         relative_path=relative_path,
         project_root=self.root_path,
         searched_path=self.subdir_path,
     )
     source_file = SourceFile(path=match, checksum=FileHash.empty())
     source_file.contents = contents
     return FileBlock(file=source_file)
    def test_model_cache_missing_file(self):
        source_file = self._matching_file('models', 'model_1.sql')
        self.parser.load_file.return_value = source_file

        source_file_different = self._matching_file('models', 'model_2.sql')
        source_file_different.nodes.append('model.root.model_2')

        old_manifest = self._new_manifest()
        old_manifest.files[
            source_file_different.path.search_key] = source_file_different
        old_manifest.nodes = {'model.root.model_2': mock.MagicMock()}

        self.loader.old_manifest = old_manifest
        self.loader.parse_with_cache(FileBlock(source_file), self.parser)
        # the filename wasn't in the cache, so parse_file should get called
        # with a  FileBlock that has the given source file in it.
        self.parser.parse_file.assert_called_once_with(
            FileBlock(file=source_file))
Example #7
0
 def create_macro_manifest(self):
     for project in self.all_projects.values():
         # what is the manifest passed in actually used for?
         macro_parser = MacroParser(project, self.manifest)
         for path in macro_parser.get_paths():
             source_file = load_source_file(
                 path, ParseFileType.Macro, project.project_name)
             block = FileBlock(source_file)
             # This does not add the file to the manifest.files,
             # but that shouldn't be necessary here.
             self.parse_with_cache(block, macro_parser)
     macro_manifest = MacroManifest(self.manifest.macros)
     return macro_manifest
Example #8
0
 def file_block_for(self, data: str, filename: str, searched: str):
     root_dir = get_abs_os_path('./dbt_modules/snowplow')
     filename = normalize(filename)
     path = FilePath(
         searched_path=searched,
         relative_path=filename,
         project_root=root_dir,
     )
     source_file = SourceFile(
         path=path,
         checksum=FileHash.from_contents(data),
     )
     source_file.contents = data
     return FileBlock(file=source_file)
    def test_model_cache_hit(self):
        source_file = self._matching_file('models', 'model_1.sql')
        self.parser.load_file.return_value = source_file

        source_file_dupe = self._matching_file('models', 'model_1.sql')
        source_file_dupe.nodes.append('model.root.model_1')

        old_manifest = self._new_manifest()
        old_manifest.files[source_file_dupe.path.search_key] = source_file_dupe
        self.loader.old_manifest = old_manifest
        self.loader.old_manifest.nodes = {
            'model.root.model_1': mock.MagicMock()
        }

        self.loader.parse_with_cache(FileBlock(source_file), self.parser)
        # there was a cache hit, so parse_file should never have been called
        self.parser.parse_file.assert_not_called()
Example #10
0
    def load(self):

        if self.old_manifest is not None:
            logger.debug('Got an acceptable saved parse result')

        # Read files creates a dictionary of projects to a dictionary
        # of parsers to lists of file strings. The file strings are
        # used to get the SourceFiles from the manifest files.
        # In the future the loaded files will be used to control
        # partial parsing, but right now we're just moving the
        # file loading out of the individual parsers and doing it
        # all at once.
        start_read_files = time.perf_counter()
        project_parser_files = {}
        for project in self.all_projects.values():
            read_files(project, self.manifest.files, project_parser_files)
        self._perf_info.read_files_elapsed = (time.perf_counter() - start_read_files)

        # We need to parse the macros first, so they're resolvable when
        # the other files are loaded
        start_load_macros = time.perf_counter()
        for project in self.all_projects.values():
            parser = MacroParser(project, self.manifest)
            parser_files = project_parser_files[project.project_name]
            for search_key in parser_files['MacroParser']:
                block = FileBlock(self.manifest.files[search_key])
                self.parse_with_cache(block, parser)
        self.reparse_macros()
        # This is where a loop over self.manifest.macros should be performed
        # to set the 'depends_on' information from static rendering.
        self._perf_info.load_macros_elapsed = (time.perf_counter() - start_load_macros)

        # Now that the macros are parsed, parse the rest of the files.
        # This is currently done on a per project basis,
        # but in the future we may change that
        start_parse_projects = time.perf_counter()
        for project in self.all_projects.values():
            self.parse_project(project, project_parser_files[project.project_name])
        self._perf_info.parse_project_elapsed = (time.perf_counter() - start_parse_projects)
Example #11
0
 def parse_file_from_path(self, path: FilePath):
     block = FileBlock(file=self.load_file(path))
     self.parse_file(block)