def run(self, artifacts: List[Artifact]) -> List[Artifact]: logger = logging.getLogger(__name__) if len(artifacts) == 1: artifact = artifacts[0] else: msg = ('Header Analyser expects only one Artifact, ' f'but was given {len(artifacts)}') raise TaskException(msg) new_artifact = Artifact(artifact.location, artifact.filetype, HeadersAnalysed) reader = FileTextReader(artifact.location) logger.debug('Looking for headers in: %s', reader.filename) for line in reader.line_by_line(): include_match: Optional[Match] \ = self._include_pattern.match(line) if include_match: include: str = include_match.group(1) logger.debug('Found header: %s', include) if include.startswith(('"', "'")): include = include.strip('"').strip("'") logger.debug(' * User header; adding dependency') new_artifact.add_dependency(Path(self._workspace / include)) return [new_artifact]
def hash(self) -> int: # If this is the first access of the property calculate the hash # and cache it for later accesses if self._hash is None: self._hash = 1 reader = FileTextReader(self.location) for line in reader.line_by_line(): self._hash = adler32(bytes(line, encoding='utf-8'), self._hash) return self._hash
def test_reading(self, tmp_path: Path): test_file = tmp_path / 'beef.food' test_file.write_text('This is my test file\nIt has two lines') test_unit = FileTextReader(test_file) content = [line for line in test_unit.line_by_line()] assert content == ['This is my test file\n', 'It has two lines'] # Call again on a now read file... for _ in test_unit.line_by_line(): fail(' No lines should be generated from a read file')
def test_harvested_data(self, caplog, tmp_path): """ Checks that the analyser deals with rescanning a file. """ caplog.set_level(logging.DEBUG) first_file: Path = tmp_path / 'other.F90' first_file.write_text( dedent(''' program betty use barney_mod, only :: dino implicit none end program betty module barney_mod end module barney_mod ''')) second_file: Path = tmp_path / 'test.f90' second_file.write_text( dedent(''' module barney_mod end module barney_mod ''')) database: SqliteStateDatabase = SqliteStateDatabase(tmp_path) test_unit = FortranAnalyser(FileTextReader(first_file), database) test_unit.run() test_unit = FortranAnalyser(FileTextReader(second_file), database) test_unit.run() fdb = FortranWorkingState(database) assert list(iter(fdb)) \ == [FortranInfo(FortranUnitID('barney_mod', first_file)), FortranInfo(FortranUnitID('barney_mod', second_file)), FortranInfo(FortranUnitID('betty', first_file), ['barney_mod'])] assert list(fdb.depends_on(FortranUnitID('betty', first_file))) \ == [FortranUnitID('barney_mod', tmp_path / 'other.F90'), FortranUnitID('barney_mod', tmp_path / 'test.f90')] # Repeat the scan of second_file, there should be no change. # test_unit = FortranAnalyser(FileTextReader(second_file), database) test_unit.run() fdb = FortranWorkingState(database) assert list(iter(fdb)) \ == [FortranInfo(FortranUnitID('barney_mod', first_file)), FortranInfo(FortranUnitID('barney_mod', second_file)), FortranInfo(FortranUnitID('betty', first_file), ['barney_mod'])] assert list(fdb.depends_on(FortranUnitID('betty', first_file))) \ == [FortranUnitID('barney_mod', tmp_path / 'other.F90'), FortranUnitID('barney_mod', tmp_path / 'test.f90')]
def run(self, artifacts: List[Artifact]) -> List[Artifact]: logger = logging.getLogger(__name__) if len(artifacts) == 1: artifact = artifacts[0] else: msg = ('C Pragma Injector expects only one Artifact, ' f'but was given {len(artifacts)}') raise TaskException(msg) logger.debug('Injecting pragmas into: %s', artifact.location) injector = _CTextReaderPragmas( FileTextReader(artifact.location)) output_file = self._workspace / artifact.location.name out_lines = [line for line in injector.line_by_line()] with output_file.open('w') as out_file: for line in out_lines: out_file.write(line) new_artifact = Artifact(output_file, artifact.filetype, Modified) for dependency in artifact.depends_on: new_artifact.add_dependency(dependency) return [new_artifact]
def test_constructor_with_file(self, tmp_path: Path): (tmp_path / 'foo').write_text('Hello') db = DummyDatabase() test_unit = AnalyserHarness(FileTextReader(tmp_path / 'foo'), db) assert test_unit.database == db assert test_unit.prerequisites == [tmp_path / 'foo'] assert test_unit.products == []
def visit(self, candidate: Path) -> List[Path]: new_candidates: List[Path] = [] try: task_class = self._extension_map[candidate.suffix] reader: TextReader = FileTextReader(candidate) hasher: TextReaderAdler32 = TextReaderAdler32(reader) if issubclass(task_class, Analyser): task: Task = task_class(hasher, self._state) elif issubclass(task_class, SingleFileCommand): flags = self._command_flags_map.get(task_class, []) task = CommandTask( task_class(Path(hasher.filename), self._workspace, flags)) else: message = \ f'Unhandled class "{task_class}" in extension map.' raise TypeError(message) # TODO: Make SQLite connection multiprocess safe # self._queue.add_to_queue(task) task.run() new_candidates.extend(task.products) # TODO: The hasher part here likely needs to be # moved once the task is run by the queue for _ in hasher.line_by_line(): pass # Make sure we've read the whole file. file_info = FileInfoDatabase(self._state) file_info.add_file_info(candidate, hasher.hash) except KeyError: pass return new_candidates
def run(self, artifacts: List[Artifact]) -> List[Artifact]: if len(artifacts) == 1: artifact = artifacts[0] else: msg = ('C Analyser expects only one Artifact, ' f'but was given {len(artifacts)}') raise TaskException(msg) reader = FileTextReader(artifact.location) state = CWorkingState(self.database) state.remove_c_file(reader.filename) new_artifact = Artifact(artifact.location, artifact.filetype, Analysed) state = CWorkingState(self.database) state.remove_c_file(reader.filename) index = clang.cindex.Index.create() translation_unit = index.parse(reader.filename, args=["-xc"]) # Create include region line mappings self._locate_include_regions(translation_unit) # Now walk the actual nodes and find all relevant external symbols usr_includes = [] current_def = None for node in translation_unit.cursor.walk_preorder(): if node.kind == clang.cindex.CursorKind.FUNCTION_DECL: if (node.is_definition() and node.linkage == clang.cindex.LinkageKind.EXTERNAL): # This should catch function definitions which are exposed # to the rest of the application current_def = CSymbolID(node.spelling, artifact.location) state.add_c_symbol(current_def) new_artifact.add_definition(node.spelling) else: # Any other declarations should be coming in via headers, # we can use the injected pragmas to work out whether these # are coming from system headers or user headers if (self._check_for_include( node.location.line) == "usr_include"): usr_includes.append(node.spelling) elif (node.kind == clang.cindex.CursorKind.CALL_EXPR): # When encountering a function call we should be able to # cross-reference it with a definition seen earlier; and # if it came from a user supplied header then we will # consider it a dependency within the project if node.spelling in usr_includes and current_def is not None: # TODO: Assumption that the most recent exposed # definition encountered above is the one which # should lodge this dependency - is that true? state.add_c_dependency(current_def, node.spelling) new_artifact.add_dependency(node.spelling) return [new_artifact]
def test_analyser_scope(self, caplog, tmp_path): """ Tests that the analyser is able to track scope correctly. """ caplog.set_level(logging.DEBUG) test_file: Path = tmp_path / 'test.f90' test_file.write_text( dedent(''' program fred implicit none if (something) then named: do i=1, 10 end do named endif contains subroutine yabadabadoo() end end program module barney implicit none type betty_type integer :: property contains procedure inspect end type interface betty_type procedure betty_constructor end contains function inspect(this) class(betty_type), intent(in) :: this integer :: inspect inspect = this%property end function inspect end module ''')) database: SqliteStateDatabase = SqliteStateDatabase(tmp_path) test_unit = FortranAnalyser(FileTextReader(test_file), database) test_unit.run() working_state = FortranWorkingState(database) assert list(working_state) \ == [FortranInfo(FortranUnitID('barney', tmp_path/'test.f90'), []), FortranInfo(FortranUnitID('fred', tmp_path/'test.f90'), [])]
def test_analyser_program_units(self, caplog, tmp_path): """ Tests that program units and the "uses" they contain are correctly identified. """ caplog.set_level(logging.DEBUG) test_file: Path = tmp_path / 'test.f90' test_file.write_text( dedent(''' program foo use iso_fortran_env, only : output use, intrinsic :: ios_c_binding use beef_mod implicit none end program foo module bar use iso_fortran_env, only : output use, intrinsic :: ios_c_binding use cheese_mod, only : bits_n_bobs implicit none end module bar function baz(first, second) use iso_fortran_env, only : output use, intrinsic :: ios_c_binding use teapot_mod implicit none end function baz subroutine qux() use iso_fortran_env, only : output use, intrinsic :: ios_c_binding use wibble_mod use wubble_mod, only: stuff_n_nonsense implicit none end subroutine qux ''')) database: SqliteStateDatabase = SqliteStateDatabase(tmp_path) test_unit = FortranAnalyser(FileTextReader(test_file), database) test_unit.run() working_state = FortranWorkingState(database) assert list(working_state) \ == [FortranInfo(FortranUnitID('bar', tmp_path/'test.f90'), ['cheese_mod']), FortranInfo(FortranUnitID('baz', tmp_path/'test.f90'), ['teapot_mod']), FortranInfo(FortranUnitID('foo', tmp_path/'test.f90'), ['beef_mod']), FortranInfo(FortranUnitID('qux', tmp_path/'test.f90'), ['wibble_mod', 'wubble_mod'])]
def run(self, artifacts: List[Artifact]) -> List[Artifact]: if len(artifacts) == 1: artifact = artifacts[0] else: msg = ('Header Analyser expects only one Artifact, ' f'but was given {len(artifacts)}') raise TaskException(msg) new_artifact = Artifact(artifact.location, artifact.filetype, HeadersAnalysed) reader = FileTextReader(artifact.location) for line in reader.line_by_line(): include_match: Optional[Match] \ = self._include_pattern.match(line) if include_match: include: str = include_match.group(1) if include.startswith(('"', "'")): include = include.strip('"').strip("'") new_artifact.add_dependency(Path(self._workspace / include)) return [new_artifact]
def test_naked_use(self, tmp_path): """ Ensures that an exception is raised if a "use" is found outside a program unit. """ test_file: Path = tmp_path / 'test.f90' test_file.write_text( dedent(''' use beef_mod module test_mod end module test_mod ''')) database: SqliteStateDatabase = SqliteStateDatabase(tmp_path) test_unit = FortranAnalyser(FileTextReader(test_file), database) with pytest.raises(TaskException): test_unit.run()
def test_constructor(self, tmp_path: Path): test_file = tmp_path / 'teapot/cheese.boo' test_file.parent.mkdir() test_file.write_text('') test_unit = FileTextReader(test_file) assert test_unit.filename == test_file
def run(self, artifacts: List[Artifact]) -> List[Artifact]: logger = logging.getLogger(__name__) if len(artifacts) == 1: artifact = artifacts[0] else: msg = ('Fortran Analyser expects only one Artifact, ' f'but was given {len(artifacts)}') raise TaskException(msg) reader = FileTextReader(artifact.location) new_artifact = Artifact(artifact.location, artifact.filetype, Analysed) state = FortranWorkingState(self.database) state.remove_fortran_file(reader.filename) normalised_source = FortranNormaliser(reader) scope: List[Tuple[str, str]] = [] for line in normalised_source.line_by_line(): logger.debug(scope) logger.debug('Considering: %s', line) if len(scope) == 0: unit_match: Optional[Match] \ = self._program_unit_pattern.match(line) if unit_match: unit_type: str = unit_match.group(1).lower() unit_name: str = unit_match.group(2).lower() logger.debug('Found %s called "%s"', unit_type, unit_name) unit_id = FortranUnitID(unit_name, reader.filename) state.add_fortran_program_unit(unit_id) new_artifact.add_definition(unit_name) scope.append((unit_type, unit_name)) continue use_match: Optional[Match] \ = self._use_pattern.match(line) if use_match: use_name: str = use_match.group(3).lower() if use_name in self._intrinsic_modules: logger.debug('Ignoring intrinsic module "%s"', use_name) else: if len(scope) == 0: use_message \ = '"use" statement found outside program unit' raise TaskException(use_message) logger.debug('Found usage of "%s"', use_name) unit_id = FortranUnitID(scope[0][1], reader.filename) state.add_fortran_dependency(unit_id, use_name) new_artifact.add_dependency(use_name) continue block_match: Optional[Match] = self._scoping_pattern.match(line) if block_match: # Beware we want the value of a different group to the one we # check the presence of. # block_name: str = block_match.group(1) \ and block_match.group(2).lower() block_nature: str = block_match.group(3).lower() logger.debug('Found %s called "%s"', block_nature, block_name) scope.append((block_nature, block_name)) continue proc_match: Optional[Match] \ = self._procedure_pattern.match(line) if proc_match: proc_nature = proc_match.group(1).lower() proc_name = proc_match.group(2).lower() logger.debug('Found %s called "%s"', proc_nature, proc_name) # Note: We append a tuple so double brackets. scope.append((proc_nature, proc_name)) continue iface_match: Optional[Match] = self._interface_pattern.match(line) if iface_match: iface_name = iface_match.group(1) \ and iface_match.group(1).lower() logger.debug('Found interface called "%s"', iface_name) scope.append(('interface', iface_name)) continue type_match: Optional[Match] = self._type_pattern.match(line) if type_match: type_name = type_match.group(3).lower() logger.debug('Found type called "%s"', type_name) scope.append(('type', type_name)) continue end_match: Optional[Match] = self._end_block_pattern.match(line) if end_match: end_nature: str = end_match.group(1) \ and end_match.group(1).lower() end_name: str = end_match.group(2) \ and end_match.group(2).lower() logger.debug('Found end of %s called %s', end_nature, end_name) exp: Tuple[str, str] = scope.pop() if end_nature is not None: if end_nature != exp[0]: end_message = 'Expected end of {exp} "{name}" ' \ 'but found {found}' end_values = { 'exp': exp[0], 'name': exp[1], 'found': end_nature } raise TaskException(end_message.format(**end_values)) if end_name is not None: if end_name != exp[1]: end_message = 'Expected end of {exp} "{name}" ' \ 'but found end of {found}' end_values = { 'exp': exp[0], 'name': exp[1], 'found': end_name } raise TaskException(end_message.format(**end_values)) return [new_artifact]
def run(self, artifacts: List[Artifact]) -> List[Artifact]: logger = logging.getLogger(__name__) if len(artifacts) == 1: artifact = artifacts[0] else: msg = ('Fortran Analyser expects only one Artifact, ' f'but was given {len(artifacts)}') raise TaskException(msg) reader = FileTextReader(artifact.location) new_artifact = Artifact(artifact.location, artifact.filetype, Analysed) state = FortranWorkingState(self.database) state.remove_fortran_file(reader.filename) logger.debug('Analysing: %s', reader.filename) # If this file defines any C symbol bindings it may also # end up with an entry in the C part of the database cstate = CWorkingState(self.database) cstate.remove_c_file(reader.filename) normalised_source = FortranNormaliser(reader) scope: List[Tuple[str, str]] = [] for line in normalised_source.line_by_line(): logger.debug(scope) logger.debug('Considering: %s', line) if len(scope) == 0: unit_match: Optional[Match] \ = self._program_unit_pattern.match(line) if unit_match is not None: unit_type: str = unit_match.group(1).lower() unit_name: str = unit_match.group(2).lower() logger.debug('Found %s called "%s"', unit_type, unit_name) unit_id = FortranUnitID(unit_name, reader.filename) state.add_fortran_program_unit(unit_id) new_artifact.add_definition(unit_name) scope.append((unit_type, unit_name)) continue use_match: Optional[Match] \ = self._use_pattern.match(line) if use_match is not None: use_name: str = use_match.group(3).lower() if use_name in self._intrinsic_modules: logger.debug('Ignoring intrinsic module "%s"', use_name) else: if len(scope) == 0: use_message \ = '"use" statement found outside program unit' raise TaskException(use_message) logger.debug('Found usage of "%s"', use_name) unit_id = FortranUnitID(scope[0][1], reader.filename) state.add_fortran_dependency(unit_id, use_name) new_artifact.add_dependency(use_name) continue block_match: Optional[Match] = self._scoping_pattern.match(line) if block_match is not None: # Beware we want the value of a different group to the one we # check the presence of. # block_name: str = block_match.group(1) \ and block_match.group(2).lower() block_nature: str = block_match.group(3).lower() logger.debug('Found %s called "%s"', block_nature, block_name) scope.append((block_nature, block_name)) continue proc_match: Optional[Match] \ = self._procedure_pattern.match(line) if proc_match is not None: proc_nature = proc_match.group(1).lower() proc_name = proc_match.group(2).lower() logger.debug('Found %s called "%s"', proc_nature, proc_name) scope.append((proc_nature, proc_name)) # Check for the procedure being symbol-bound to C cbind_match: Optional[Match] \ = self._cbind_pattern.match(line) if cbind_match is not None: cbind_name = cbind_match.group(2) # The name keyword on the bind statement is optional. # If it doesn't exist, the procedure name is used if cbind_name is None: cbind_name = proc_name cbind_name = cbind_name.lower().strip("'\"") logger.debug('Bound to C symbol "%s"', cbind_name) # A bind within an interface block means this is # exposure of a C-defined function to Fortran, # otherwise it is going the other way (allowing C # code to call the Fortran procedure) if any([stype == "interface" for stype, _ in scope]): # TODO: This is sort of hijacking the mechanism used # for Fortran module dependencies, only using the # symbol name. Longer term we probably need a more # elegant solution logger.debug('In an interface block; so a dependency') unit_id = FortranUnitID(scope[0][1], reader.filename) state.add_fortran_dependency(unit_id, cbind_name) new_artifact.add_dependency(cbind_name) else: # Add to the C database logger.debug('Not an interface block; so a definition') symbol_id = CSymbolID(cbind_name, reader.filename) cstate.add_c_symbol(symbol_id) new_artifact.add_definition(cbind_name) continue cbind_match = self._cbind_pattern.match(line) if cbind_match is not None: # This should be a line binding from C to a variable definition # (procedure binds are dealt with above) cbind_name = cbind_match.group(2) # The name keyword on the bind statement is optional. # If it doesn't exist, the Fortran variable name is used if cbind_name is None: var_search = re.search(r'.*::\s*(\w+)', line) if var_search: cbind_name = var_search.group(1) else: cbind_message \ = 'failed to find variable name ' \ 'on C bound variable' raise TaskException(cbind_message) cbind_name = cbind_name.lower().strip("'\"") logger.debug('Found C bound variable called "%s"', cbind_name) # Add to the C database symbol_id = CSymbolID(cbind_name, reader.filename) cstate.add_c_symbol(symbol_id) new_artifact.add_definition(cbind_name) iface_match: Optional[Match] = self._interface_pattern.match(line) if iface_match is not None: iface_name = iface_match.group(1) \ and iface_match.group(1).lower() logger.debug('Found interface called "%s"', iface_name) scope.append(('interface', iface_name)) continue type_match: Optional[Match] = self._type_pattern.match(line) if type_match is not None: type_name = type_match.group(3).lower() logger.debug('Found type called "%s"', type_name) scope.append(('type', type_name)) continue end_match: Optional[Match] = self._end_block_pattern.match(line) if end_match is not None: end_nature: str = end_match.group(1) \ and end_match.group(1).lower() end_name: str = end_match.group(2) \ and end_match.group(2).lower() logger.debug('Found end of %s called %s', end_nature, end_name) exp: Tuple[str, str] = scope.pop() if end_nature is not None: if end_nature != exp[0]: end_message = 'Expected end of {exp} "{name}" ' \ 'but found {found}' end_values = { 'exp': exp[0], 'name': exp[1], 'found': end_nature } raise TaskException(end_message.format(**end_values)) if end_name is not None: if end_name != exp[1]: end_message = 'Expected end of {exp} "{name}" ' \ 'but found end of {found}' end_values = { 'exp': exp[0], 'name': exp[1], 'found': end_name } raise TaskException(end_message.format(**end_values)) return [new_artifact]