def csv_lines(self): # a module with a dependency on a fortran and c file, plus a mo commented dep data = [ AnalysedFile(fpath='my_mod.f90', file_hash=123, module_defs={'my_mod'}, symbol_defs={'my_mod'}, symbol_deps={'dep1_mod', 'dep2'}, mo_commented_file_deps={'mo_dep.c'}), AnalysedFile(fpath='dep1_mod.f90', file_hash=234, module_defs={'dep1_mod'}, symbol_defs={'dep1_mod'}), AnalysedFile(fpath='dep2.c', file_hash=345, symbol_defs={'dep2'}), ] lines = [','.join(AnalysedFile.field_names())] # header row for af in data: str_dict = af.to_str_dict() columns = [str_dict[field_name] for field_name in af.field_names()] row = ','.join(columns) lines.append(row) return lines
def analysed_files(): a = AnalysedFile(fpath=Path('a.f90'), file_deps={Path('b.f90')}, file_hash=None) b = AnalysedFile(fpath=Path('b.f90'), file_deps={Path('c.f90')}, file_hash=None) c = AnalysedFile(fpath=Path('c.f90'), file_hash=None) return a, b, c
def analysed_files(self): return [ AnalysedFile(fpath=Path('foo.c'), symbol_defs=['foo_1', 'foo_2'], file_hash=None), AnalysedFile(fpath=Path('bar.c'), symbol_defs=['bar_1', 'bar_2'], file_hash=None) ]
def test_analysis_results(): # using a physical analysis file, this tests: # new source file # changed source file # previously analysed source file, unchanged # source file no longer there # source folder before change previous_file_hashes = { Path('change.f90'): 111, Path('no_change.f90'): 222, Path('remove.f90'): 333, } # source folder after change latest_file_hashes = { Path('change.f90'): 123, Path('no_change.f90'): 222, Path('new.f90'): 444, } previous_results = { AnalysedFile(fpath=path, file_hash=file_hash) for path, file_hash in previous_file_hashes.items() } with TemporaryDirectory() as tmpdir: analyser = Analyse(root_symbol=None) # simulate the effect of calling run, in which the superclass sets up the _config attribute (is this too ugly?) analyser._config = mock.Mock(project_workspace=Path(tmpdir)) # create the initial analysis file with analyser._new_analysis_file(unchanged=previous_results): pass # check it loads correctly with no changes detected loaded_results = analyser._load_analysis_results(previous_file_hashes) changed, unchanged = analyser._what_needs_reanalysing( prev_results=loaded_results, latest_file_hashes=previous_file_hashes) assert not changed assert unchanged == previous_results # check we correctly identify new, changed, unchanged and removed files loaded_results = analyser._load_analysis_results(latest_file_hashes) changed, unchanged = analyser._what_needs_reanalysing( prev_results=loaded_results, latest_file_hashes=latest_file_hashes) assert unchanged == { AnalysedFile(fpath=Path('no_change.f90'), file_hash=222) } assert changed == { HashedFile(fpath=Path('change.f90'), file_hash=123), HashedFile(fpath=Path('new.f90'), file_hash=444) }
def src_tree(): return { Path('foo.f90'): AnalysedFile(fpath=Path('foo.f90'), file_hash=None), Path('root.f90'): AnalysedFile( fpath=Path('root.f90'), file_deps={Path('a.f90'), Path('b.f90')}, file_hash=None), Path('a.f90'): AnalysedFile( fpath=Path('a.f90'), file_deps={Path('c.f90')}, file_hash=None), Path('b.f90'): AnalysedFile( fpath=Path('b.f90'), file_deps={Path('c.f90')}, file_hash=None), Path('c.f90'): AnalysedFile( fpath=Path('c.f90'), file_deps=set(), file_hash=None), }
def run(self, hashed_file: HashedFile): fpath, file_hash = hashed_file log_or_dot(logger, f"analysing {fpath}") analysed_file = AnalysedFile(fpath=fpath, file_hash=file_hash) index = clang.cindex.Index.create() translation_unit = index.parse(fpath, args=["-xc"]) # Create include region line mappings self._locate_include_regions(translation_unit) # Now walk the actual nodes and find all relevant external symbols usr_symbols: List[str] = [] for node in translation_unit.cursor.walk_preorder(): if not node.spelling: continue # ignore sys include stuff if self._check_for_include(node.location.line) == "sys_include": continue logger.debug('Considering node: %s', node.spelling) if node.kind in {clang.cindex.CursorKind.FUNCTION_DECL, clang.cindex.CursorKind.VAR_DECL}: self._process_symbol_declaration(analysed_file, node, usr_symbols) elif node.kind in {clang.cindex.CursorKind.CALL_EXPR, clang.cindex.CursorKind.DECL_REF_EXPR}: self._process_symbol_dependency(analysed_file, node, usr_symbols) return analysed_file
def _load_analysis_results( self, latest_file_hashes: Dict[Path, int]) -> Dict[Path, AnalysedFile]: """ The analysis file includes the hash of each file when we last analysed it. We discard previous results from files which are no longer present. """ prev_results: Dict[Path, AnalysedFile] = dict() try: with open(self._config.project_workspace / "__analysis.csv", "rt") as csv_file: dict_reader = csv.DictReader(csv_file) for row in dict_reader: analysed_file = AnalysedFile.from_str_dict(row) # file no longer there? if analysed_file.fpath not in latest_file_hashes: logger.info( f"file no longer present: {analysed_file.fpath}") continue # ok, we have previously analysed this file prev_results[analysed_file.fpath] = analysed_file logger.info( f"loaded {len(prev_results)} previous analysis results") except FileNotFoundError: logger.info("no previous analysis results") pass return prev_results
def test_vanilla(self): # ensure the command is formed correctly config = SimpleNamespace(project_workspace=Path('foo'), source_root=Path('foo/src'), multiprocessing=False, reuse_artefacts=False) c_compiler = CompileC(compiler='gcc', common_flags=['-c'], path_flags=[ AddFlags( match='foo/src/*', flags=['-I', 'foo/include', '-Dhello']) ]) analysed_files = { Path('foo/src/foo.c'): AnalysedFile(fpath=Path('foo/src/foo.c'), file_hash=None) } with mock.patch('fab.steps.compile_c.run_command') as mock_run: with mock.patch('fab.steps.compile_c.send_metric'): c_compiler.run( artefact_store={BUILD_TREES: { None: analysed_files }}, config=config) mock_run.assert_called_with([ 'gcc', '-c', '-I', 'foo/include', '-Dhello', 'foo/src/foo.c', '-o', 'foo/src/foo.o' ])
def module_expected(module_fpath): return AnalysedFile( fpath=module_fpath, file_hash=None, module_defs={'foo_mod'}, symbol_defs={'external_sub', 'external_func', 'foo_mod'}, module_deps={'bar_mod'}, symbol_deps={'monty_func', 'bar_mod'}, file_deps=set(), mo_commented_file_deps={'some_file.c'}, )
def test_nothing_changed(self, analyser, csv_lines, latest_file_hashes): # a simple example of a fortran module depending on a fortran and a c file file_data = "\n".join(csv_lines) with mock.patch('fab.steps.analyse.open', mock_open(read_data=file_data)): results = analyser._load_analysis_results( latest_file_hashes=latest_file_hashes) expected = { Path('my_mod.f90'): AnalysedFile(fpath=Path('my_mod.f90'), file_hash=123, module_defs={ 'my_mod', }, symbol_defs={ 'my_mod', }, symbol_deps={'dep1_mod', 'dep2'}, mo_commented_file_deps={ 'mo_dep.c', }), Path('dep1_mod.f90'): AnalysedFile(fpath=Path('dep1_mod.f90'), file_hash=234, module_defs={ 'dep1_mod', }, symbol_defs={ 'dep1_mod', }), Path('dep2.c'): AnalysedFile(fpath=Path('dep2.c'), file_hash=345, symbol_defs={ 'dep2', }), } assert results == expected
def test_missing_file(self, analyser, csv_lines, latest_file_hashes): # a previously analysed file is no longer present # remove a file del latest_file_hashes[Path('dep2.c')] file_data = "\n".join(csv_lines) with mock.patch('fab.steps.analyse.open', mock_open(read_data=file_data)): results = analyser._load_analysis_results( latest_file_hashes=latest_file_hashes) expected = { Path('my_mod.f90'): AnalysedFile(fpath=Path('my_mod.f90'), file_hash=123, module_defs={ 'my_mod', }, symbol_defs={ 'my_mod', }, symbol_deps={'dep1_mod', 'dep2'}, mo_commented_file_deps={ 'mo_dep.c', }), Path('dep1_mod.f90'): AnalysedFile(fpath=Path('dep1_mod.f90'), file_hash=234, module_defs={ 'dep1_mod', }, symbol_defs={ 'dep1_mod', }), } assert results == expected
def test_vanilla(self): analyser = Analyse(root_symbol=None) # we analysed the source folder and found these symbols symbols = { "root": Path("root.f90"), "root_dep": Path("root_dep.f90"), "util": Path("util.f90"), "util_dep": Path("util_dep.f90"), } # we extracted the build tree build_tree = { Path('root.f90'): AnalysedFile(fpath=Path(), file_hash=None), Path('root_dep.f90'): AnalysedFile(fpath=Path(), file_hash=None), } # we want to force this symbol into the build [because it's not used via modules] analyser.unreferenced_deps = ['util'] # the stuff to add to the build tree will be found in here all_analysed_files = { # root.f90 and root_util.f90 would also be in here but the test doesn't need them Path('util.f90'): AnalysedFile(fpath=Path('util.f90'), file_deps={Path('util_dep.f90')}, file_hash=None), Path('util_dep.f90'): AnalysedFile(fpath=Path('util_dep.f90'), file_hash=None), } analyser._add_unreferenced_deps(symbols=symbols, all_analysed_files=all_analysed_files, build_tree=build_tree) assert Path('util.f90') in build_tree assert Path('util_dep.f90') in build_tree
def test_simple_result(tmp_path): fpath = Path(Path(__file__).parent / "test_c_analyser.c") result = CAnalyser().run(HashedFile(fpath, None)) expected = AnalysedFile( fpath=fpath, file_hash=None, symbol_deps={'usr_var', 'usr_func'}, symbol_defs={ 'func_decl', 'func_def', 'var_def', 'var_extern_def', 'main' }, file_deps=set(), mo_commented_file_deps=set(), ) assert result == expected
def _new_analysis_file(self, unchanged: Iterable[AnalysedFile]): """ Create the analysis file from scratch, containing any content from its previous version which is still valid. The returned context is a csv.DictWriter. """ with TimerLogger("starting analysis progress file"): analysis_progress_file = open( self._config.project_workspace / "__analysis.csv", "wt") analysis_dict_writer = csv.DictWriter( analysis_progress_file, fieldnames=AnalysedFile.field_names()) analysis_dict_writer.writeheader() # re-write the progress so far unchanged_rows = (af.to_str_dict() for af in unchanged) analysis_dict_writer.writerows(unchanged_rows) analysis_progress_file.flush() yield analysis_dict_writer analysis_progress_file.close()
def run(self, hashed_file: HashedFile): fpath, file_hash = hashed_file log_or_dot(logger, f"analysing {fpath}") # parse the file try: tree = self._parse_file(fpath=fpath) except Exception as err: return err if tree.content[0] is None: logger.debug(f" empty tree found when parsing {fpath}") return EmptySourceFile(fpath) analysed_file = AnalysedFile(fpath=fpath, file_hash=file_hash) # see what's in the tree try: for obj in iter_content(tree): obj_type = type(obj) # todo: ?replace these with function lookup dict[type, func]? - Or the new match statement, Python 3.10 if obj_type == Use_Stmt: self._process_use_statement(analysed_file, obj) # raises elif obj_type == Program_Stmt: analysed_file.add_symbol_def(str(obj.get_name())) elif obj_type == Module_Stmt: analysed_file.add_module_def(str(obj.get_name())) elif obj_type in (Subroutine_Stmt, Function_Stmt): self._process_subroutine_or_function(analysed_file, fpath, obj) # todo: we've not needed this so far, for jules or um...(?) elif obj_type == "variable binding not yet supported": return self._process_variable_binding(fpath) elif obj_type == Comment: self._process_comment(analysed_file, obj) except Exception as err: return err logger.debug(f" analysed {analysed_file.fpath}") return analysed_file
def um_atmos_safe_config(revision): um_revision = revision.replace('vn', 'um') config = BuildConfig( project_label=f'um atmos safe {revision}', # multiprocessing=False, reuse_artefacts=True, ) # Locate the gcom library. UM 12.1 intended to be used with gcom 7.6 gcom_build = os.getenv('GCOM_BUILD') or \ os.path.expanduser(config.project_workspace / "../gcom-object-archive-vn7.6/build_output") logger.info(f"expecting gcom at {gcom_build}") config.steps = [ # todo: these repo defs could make a good set of reusable variables # UM 12.1, 16th November 2021 GrabFcm(src='fcm:um.xm_tr/src', dst='um', revision=revision), # JULES 6.2, for UM 12.1 GrabFcm(src='fcm:jules.xm_tr/src', dst='jules', revision=um_revision), # SOCRATES 21.11, for UM 12.1 GrabFcm(src='fcm:socrates.xm_tr/src', dst='socrates', revision=um_revision), # SHUMLIB, for UM 12.1 GrabFcm(src='fcm:shumlib.xm_tr/', dst='shumlib', revision=um_revision), # CASIM, for UM 12.1 GrabFcm(src='fcm:casim.xm_tr/src', dst='casim', revision=um_revision), MyCustomCodeFixes(name="my custom code fixes"), FindSourceFiles(path_filters=file_filtering), RootIncFiles(), CPragmaInjector(), c_preprocessor( source=CollectionGetter('pragmad_c'), path_flags=[ # todo: this is a bit "codey" - can we safely give longer strings and split later? AddFlags(match="$source/um/*", flags=[ '-I$source/um/include/other', '-I$source/shumlib/common/src', '-I$source/shumlib/shum_thread_utils/src' ]), AddFlags(match="$source/shumlib/*", flags=[ '-I$source/shumlib/common/src', '-I$source/shumlib/shum_thread_utils/src' ]), # todo: just 3 folders use this AddFlags("$source/um/*", ['-DC95_2A', '-I$source/shumlib/shum_byteswap/src']), ], ), # todo: explain fnmatch fortran_preprocessor( common_flags=['-P'], path_flags=[ AddFlags("$source/jules/*", ['-DUM_JULES']), AddFlags("$source/um/*", ['-I$relative/include']), # coupling defines AddFlags("$source/um/control/timer/*", ['-DC97_3A']), AddFlags("$source/um/io_services/client/stash/*", ['-DC96_1C']), ], ), Analyse( root_symbol='um_main', # fparser2 fails to parse this file, but it does compile. special_measure_analysis_results={ AnalysedFile(fpath=Path(config.project_workspace / BUILD_OUTPUT / "casim/lookup.f90"), file_hash=None, symbol_defs={'lookup'}, symbol_deps={ 'mphys_die', 'variable_precision', 'mphys_switches', 'mphys_parameters', 'special', 'passive_fields', 'casim_moments_mod', 'yomhook', 'parkind1' }, file_deps={}, mo_commented_file_deps={}), }), CompileC(compiler='gcc', common_flags=['-c', '-std=c99']), CompileFortran( # compiler='mpifort', compiler='gfortran', common_flags=[ '-fdefault-integer-8', '-fdefault-real-8', '-fdefault-double-8', '-c', '-J', '$output', # .mod file output and include folder # '-O2' ], path_flags=[ # mpl include - todo: just add this for everything? AddFlags("$output/um/*", ['-I' + gcom_build]), AddFlags("$output/jules/*", ['-I' + gcom_build]), # required for newer compilers # # todo: allow multiple filters per instance? # *[AddFlags(*i) for i in ALLOW_MISMATCH_FLAGS] ]), # this step just makes linker error messages more manageable ArchiveObjects(), LinkExe( linker='mpifort', flags=[ '-lc', '-lgfortran', '-L', '~/.conda/envs/sci-fab/lib', '-L', gcom_build, '-l', 'gcom' ], ) ] return config