class Source: """ Representation of a collection with the data collected by a source code analysis. The collection contains information about functions, variable initializations, a functions call graph, macros. """ def __init__(self, logger, conf, abstract_task): """ Setup initial attributes and get logger object. :param logger: logging object. :param conf: Source code analysis configuration. :param abstract_task: Abstract verification task dictionary (given by VTG). :param conf: Configuration properties dictionary. """ self.logger = logger self._conf = conf self._source_functions = dict() self._source_vars = dict() self._macros = dict() self.__function_calls_cache = dict() # Initialize Clade cient to make requests self._clade = Clade(self._conf['build base']) # Ask for dependencies for each CC cfiles, files_map = self._collect_file_dependencies(abstract_task) # Read file with source analysis self._import_code_analysis(cfiles, files_map) @property def source_functions(self): """ Return a list of function names. :return: function names list. """ return list(self._source_functions.keys()) def get_source_function(self, name, path=None, declaration=None): """ Provides the function by a given name from the collection. :param name: Function name. :param path: File where the function should be declared or defined. :param declaration: Declaration object representing the function of interest. :return: Function object or None. """ name = self.refined_name(name) if name and name in self._source_functions: if path and path in self._source_functions[name]: return self._source_functions[name][path] else: functions = self.get_source_functions(name, declaration=declaration) if len(functions) == 1: return functions[0] elif len(functions) > 1: raise ValueError( "There are several definitions of function {!r} in provided code you must specify " "scope".format(name)) return None def get_source_functions(self, name, declaration=None): """ Provides all functions found by a given name from the collection. :param name: Function name. :param declaration: Declaration object representing the function of interest. :return: List with Function objects. """ name = self.refined_name(name) result = [] if name and name in self._source_functions: for func in self._source_functions[name].values(): if func not in result and ( not declaration or (declaration and declaration.compare(func.declaration))): result.append(func) return result def set_source_function(self, new_obj, path): """ Replace an Function object in the collection. :param new_obj: Function object. :param path: File where the function should be declared or defined. :return: None. """ if new_obj.name not in self._source_functions: self._source_functions[new_obj.name] = dict() self._source_functions[new_obj.name][path] = new_obj def remove_source_function(self, name): """ Delete the function from the collection. :param name: Function name. :return: None. """ del self._source_functions[name] @property def source_variables(self): """ Return list of global variables. :return: Variable names list. """ return list(self._source_vars.keys()) def get_source_variable(self, name, path=None): """ Provides a gloabal variable by a given name and scope file from the collection. :param name: Variable name. :param path: File with the variable declaration or initialization. :return: Variable object or None. """ name = self.refined_name(name) if name and name in self._source_vars: if path and path in self._source_vars[name]: return self._source_vars[name][path] else: variables = self.get_source_variables(name) if len(variables) == 1: return variables[0] return None def get_source_variables(self, name): """ Provides all global variables by a given name from the collection. :param name: Variable name. :return: List with Variable objects. """ name = self.refined_name(name) result = [] if name and name in self._source_vars: for var in self._source_vars[name].values(): if var not in result: result.append(var) return result def set_source_variable(self, new_obj, path): """ Replace an object in global variables collection. :param new_obj: Variable object. :param path: File with the variable declaration or initialization. :return: None. """ if new_obj.name not in self._source_vars: self._source_vars[new_obj.name] = dict() self._source_vars[new_obj.name][path] = new_obj def remove_source_variable(self, name): """ Delete the global variable from the collection. :param name: Variable name. :return: None. """ del self._source_vars[name] def get_macro(self, name): """ Provides a macro by a given name from the collection. :param name: Macro name. :return: Macro object or None. """ if name in self._macros: return self._macros[name] else: return None def set_macro(self, new_obj): """ Set or replace an object in macros collection. :param new_obj: Macro object. :return: None. """ self._macros[new_obj.name] = new_obj def remove_macro(self, name): """ Delete the macro from the collection. :param name: Macro name. :return: None. """ del self._macros[name] @staticmethod def refined_name(call): """ Resolve function name from simple expressions which contains explicit function name like '& myfunc', '(myfunc)', '(& myfunc)' or 'myfunc'. :param call: An expression string. :return: Extracted function name string. """ name_re = re.compile("\(?\s*&?\s*(\w+)\s*\)?$") if name_re.fullmatch(call): return name_re.fullmatch(call).group(1) else: return None def _import_code_analysis(self, cfiles, dependencies): """ Read global variables, functions and macros to fill up the collection. :param source_analysis: Dictionary with the content of source analysis. :param files_map: Dictionary to resolve main file by an included file. :return: None. """ # Import typedefs if there are provided self.logger.info("Extract complete types definitions") typedef = self._clade.get_typedefs( set(dependencies.keys()).union(cfiles)) if typedef: import_typedefs(typedef, dependencies) variables = self._clade.get_variables(cfiles) if variables: self.logger.info("Import global variables initializations") for path, vals in variables.items(): for variable in vals: variable_name = extract_name(variable['declaration']) if not variable_name: raise ValueError('Global variable without a name') var = Variable(variable_name, variable['declaration']) # Here we know, that if we met a variable in an another file then it is an another variable because # a program should contain a single global variable initialization self.set_source_variable(var, path) var.declaration_files.add(path) var.initialization_file = path var.static = is_static(variable['declaration']) if 'value' in variable: var.value = variable['value'] # Variables which are used in variables initalizations self.logger.info("Import source functions") vfunctions = self._clade.get_used_in_vars_functions() # Get functions defined in dependencies and in the main functions and have calls cg = self._clade.get_callgraph(set(dependencies.keys())) # Function scope definitions # todo: maybe this should be fixed in Clade # As we will not get definitions for library functions if there are in compiled parts we should add all scopes # that are given for all function called from outside of the code we analyze for scope in (s for s in cfiles if s in cg): for func in (f for f in cg[scope] if cg[scope][f].get('calls')): for dep in cg[scope][func].get('calls'): dependencies.setdefault(dep, set()) dependencies[dep].add(scope) fs = self._clade.get_functions_by_file( set(dependencies.keys()).union(cfiles)) # Add called functions for scope in cg: for func in cg[scope]: desc = cg[scope][func] if scope in cfiles: # Definition of the function is in the code of interest self._add_function(func, scope, fs, dependencies, cfiles) # Add called functions for def_scope, cf_desc in desc.get('calls', dict()).items(): if def_scope not in cfiles: for called_func in ( f for f in cf_desc if def_scope in fs and f in fs[def_scope]): self._add_function(called_func, def_scope, fs, dependencies, cfiles) elif ('called_in' in desc and set(desc['called_in'].keys()).intersection(cfiles) ) or func in vfunctions: if scope in fs and func in fs[scope]: # Function is called in the target code but defined in dependencies self._add_function(func, scope, fs, dependencies, cfiles) elif scope != 'unknown': self.logger.warning( "There is no information on declarations of function {!r} from {!r} scope" .format(func, scope)) # Add functions missed in the call graph for scope in (s for s in fs if s in cfiles): for func in fs[scope]: func_intf = self.get_source_function(func, scope) if not func_intf: self._add_function(func, scope, fs, dependencies, cfiles) for func in self.source_functions: for obj in self.get_source_functions(func): scopes = set(obj.declaration_files).union(set( obj.header_files)) if not obj.definition_file: # It is likely be this way scopes.add('unknown') for scope in (s for s in scopes if cg.get(s, dict()).get(func)): for cscope, desc in ((s, d) for s, d in cg[scope][func].get( 'called_in', {}).items() if s in cfiles): for caller in desc: for line in desc[caller]: params = desc[caller][line].get('args') caller_intf = self.get_source_function( caller, cscope) obj.add_call(caller, cscope) if params: # Here can be functions which are not defined or visible for _, passed_func in list(params): passed_obj = self.get_source_function( passed_func, cscope) if not passed_obj: passed_scope = self._search_function( passed_func, cscope, fs) if passed_scope: self._add_function( passed_func, passed_scope, fs, dependencies, cfiles) else: self.logger.warning( "Cannot find function {!r} from scope {!r}" .format( passed_func, cscope)) # Ignore this call since model will not be correct without signature params = None break caller_intf.call_in_function(obj, params) macros_file = get_conf_property(self._conf['source analysis'], 'macros white list') if macros_file: macros_file = find_file_or_dir( self.logger, self._conf['main working directory'], macros_file) with open(macros_file, 'r', encoding='utf8') as fp: white_list = ujson.load(fp) if white_list: macros = self._clade.get_macros_expansions(cfiles, white_list) for path, macros in macros.items(): for macro, desc in macros.items(): obj = self.get_macro(macro) if not obj: obj = Macro(macro) for call in desc.get('args', []): obj.add_parameters(path, call) self.set_macro(obj) def _search_function(self, func_name, some_scope, fs): # Be aware of this funciton - it is costly if some_scope in fs and func_name in fs[some_scope]: return some_scope elif 'unknown' in fs and func_name in fs['unknown']: return 'unknown' else: for s in (s for s in fs if func_name in fs[s]): return s return None def _add_function(self, func, scope, fs, deps, cfiles): fs_desc = fs[scope][func] if scope == 'unknown': key = list(fs_desc['declarations'].keys())[0] signature = fs_desc['declarations'][key]['signature'] func_intf = Function(func, signature) # Do not set definition file since it is out of scope of the target program fragment else: signature = fs_desc.get('signature') func_intf = Function(func, signature) func_intf.definition_file = scope # Set static if fs_desc.get('type') == "static": func_intf.static = True else: func_intf.static = False # Add declarations files = {func_intf.definition_file } if func_intf.definition_file else set() if fs_desc['declarations']: files.update({ f for f in fs_desc['declarations'] if f != 'unknown' and f in deps }) for file in files: if file not in cfiles and file not in func_intf.header_files: func_intf.header_files.append(file) for cfile in deps[file]: self.set_source_function(func_intf, cfile) func_intf.declaration_files.add(cfile) def _collect_file_dependencies(self, abstract_task): """ Collect for each included header file or c file its "main" file to which it was included. This is required since we cannot write aspects and instrument files which have no CC command so me build this map. :param abstract_task: Abstract task dictionary. :return: Collection dictionary {included file: {files that include this one}}. """ collection = dict() c_files = set() def _collect_cc_deps(cfile, deps): # Collect for each file CC entry to which it is included for file in deps: if file not in collection: collection[file] = set() collection[file].add(cfile) # Read each CC description and import map of files to in files for group in abstract_task['grps']: for desc in group['Extra CCs']: cc_desc = self._clade.get_cmd(desc['CC']) c_file = cc_desc['in'][0] # Now read deps _collect_cc_deps(c_file, self._clade.get_cmd_deps(desc['CC'])) c_files.add(c_file) return c_files, collection
class Job(klever.core.components.Component): CORE_COMPONENTS = ['PFG', 'VTG', 'VRP'] def __init__(self, conf, logger, parent_id, callbacks, mqs, vals, id=None, work_dir=None, attrs=None, separate_from_parent=True, include_child_resources=False, components_common_conf=None): super(Job, self).__init__(conf, logger, parent_id, callbacks, mqs, vals, id, work_dir, attrs, separate_from_parent, include_child_resources) self.common_components_conf = components_common_conf if work_dir: self.common_components_conf[ 'additional sources directory'] = os.path.join( os.path.realpath(work_dir), 'additional sources') self.clade = None self.components = [] self.component_processes = [] def decide_job_or_sub_job(self): self.logger.info('Decide job/sub-job "{0}"'.format(self.id)) # This is required to associate verification results with particular sub-jobs. # Skip leading "/" since this identifier is used in os.path.join() that returns absolute path otherwise. self.common_components_conf['sub-job identifier'] = self.id[1:] # Check and set build base here since many Core components need it. self.__set_build_base() self.clade = Clade(self.common_components_conf['build base']) if not self.clade.work_dir_ok(): raise RuntimeError('Build base is not OK') self.__retrieve_working_src_trees() self.__get_original_sources_basic_info() self.__upload_original_sources() # Create directory where files will be cached and remember absolute path to it for components. os.mkdir('cache') self.common_components_conf['cache directory'] = os.path.realpath( 'cache') if self.common_components_conf['keep intermediate files']: self.logger.debug( 'Create components configuration file "conf.json"') with open('conf.json', 'w', encoding='utf8') as fp: json.dump(self.common_components_conf, fp, ensure_ascii=False, sort_keys=True, indent=4) self.__get_job_or_sub_job_components() self.callbacks = klever.core.components.get_component_callbacks( self.logger, [type(self)] + self.components) self.launch_sub_job_components() self.clean_dir = True self.logger.info("All components finished") if self.conf.get('collect total code coverage', None): self.logger.debug('Waiting for a collecting coverage') while not self.vals['coverage_finished'].get( self.common_components_conf['sub-job identifier'], True): time.sleep(1) self.logger.debug("Coverage collected") main = decide_job_or_sub_job def __set_build_base(self): if 'build base' not in self.common_components_conf: raise KeyError( "Provide 'build base' configuration option to start verification" ) common_advice = 'please, fix "job.json" (attribute "build base")' common_advice += ' or/and deployment configuration file (attribute "Klever Build Bases")' # Try to find specified build base either in normal way or additionally in directory "build bases" that is # convenient to use when working with many build bases. try: build_base = klever.core.utils.find_file_or_dir( self.logger, os.path.curdir, self.common_components_conf['build base']) except FileNotFoundError: try: build_base = klever.core.utils.find_file_or_dir( self.logger, os.path.curdir, os.path.join('build bases', self.common_components_conf['build base'])) except FileNotFoundError: raise FileNotFoundError( 'Specified build base "{0}" does not exist, {1}'.format( self.common_components_conf['build base'], common_advice)) from None # Extract build base from archive. There should not be any intermediate directories in archives. if os.path.isfile(build_base) and (tarfile.is_tarfile(build_base) or zipfile.is_zipfile(build_base)): if tarfile.is_tarfile(build_base): self.logger.debug( 'Build base "{0}" is provided in form of TAR archive'. format(build_base)) with tarfile.open(build_base) as TarFile: TarFile.extractall('build base') else: self.logger.debug( 'Build base "{0}" is provided in form of ZIP archive'. format(build_base)) with zipfile.ZipFile(build_base) as zfp: zfp.extractall('build base') # Directory contains extracted build base. extracted_from = ' extracted from "{0}"'.format( os.path.realpath(build_base)) build_base = 'build base' else: extracted_from = '' # We need to specify absolute path to build base since it will be used in different Klever components. Besides, # this simplifies troubleshooting. build_base = os.path.realpath(build_base) # TODO: fix after https://github.com/17451k/clade/issues/108. if not os.path.isdir(build_base): raise FileExistsError( 'Build base "{0}" is not a directory, {1}'.format( build_base, extracted_from, common_advice)) if not os.path.isfile(os.path.join(build_base, 'meta.json')): raise FileExistsError( 'Directory "{0}"{1} is not a build base since it does not contain file "meta.json", {2}' .format(build_base, extracted_from, common_advice)) self.common_components_conf['build base'] = build_base self.logger.debug('Klever components will use build base "{0}"'.format( self.common_components_conf['build base'])) # Klever will try to cut off either working source trees (if specified) or at least build directory (otherwise) # from referred file names. Sometimes this is rather optional like for source files referred by error traces, but, # say, for program fragment identifiers this is strictly necessary, e.g. because of otherwise expert assessment will # not work as expected. def __retrieve_working_src_trees(self): clade_meta = self.clade.get_meta() self.common_components_conf['working source trees'] = clade_meta['working source trees'] \ if 'working source trees' in clade_meta else [clade_meta['build_dir']] def __refer_original_sources(self, src_id): klever.core.utils.report(self.logger, 'patch', { 'identifier': self.id, 'original_sources': src_id }, self.mqs['report files'], self.vals['report id'], self.conf['main working directory']) def __process_source_files(self): for file_name in self.clade.src_info: self.mqs['file names'].put(file_name) for i in range(self.workers_num): self.mqs['file names'].put(None) def __process_source_file(self): while True: file_name = self.mqs['file names'].get() if not file_name: return src_file_name = klever.core.utils.make_relative_path( self.common_components_conf['working source trees'], file_name) if src_file_name != file_name: src_file_name = os.path.join('source files', src_file_name) new_file_name = os.path.join('original sources', src_file_name.lstrip(os.path.sep)) os.makedirs(os.path.dirname(new_file_name), exist_ok=True) shutil.copy(self.clade.get_storage_path(file_name), new_file_name) cross_refs = CrossRefs( self.common_components_conf, self.logger, self.clade, file_name, new_file_name, self.common_components_conf['working source trees'], 'source files') cross_refs.get_cross_refs() def __get_original_sources_basic_info(self): self.logger.info( 'Get information on original sources for following visualization of uncovered source files' ) # For each source file we need to know the total number of lines and places where functions are defined. src_files_info = dict() for file_name, file_size in self.clade.src_info.items(): src_file_name = klever.core.utils.make_relative_path( self.common_components_conf['working source trees'], file_name) # Skip non-source files. if src_file_name == file_name: continue src_file_name = os.path.join('source files', src_file_name) src_files_info[src_file_name] = list() # Store source file size. src_files_info[src_file_name].append(file_size['loc']) # Store source file function definition lines. func_def_lines = list() funcs = self.clade.get_functions_by_file([file_name], False) if funcs: for func_name, func_info in list(funcs.values())[0].items(): func_def_lines.append(int(func_info['line'])) src_files_info[src_file_name].append(sorted(func_def_lines)) # Dump obtain information (huge data!) to load it when reporting total code coverage if everything will be okay. with open('original sources basic information.json', 'w') as fp: klever.core.utils.json_dump(src_files_info, fp, self.conf['keep intermediate files']) def __upload_original_sources(self): # Use Clade UUID to distinguish various original sources. It is pretty well since this UUID is uuid.uuid4(). src_id = self.clade.get_uuid() session = klever.core.session.Session(self.logger, self.conf['Klever Bridge'], self.conf['identifier']) if session.check_original_sources(src_id): self.logger.info('Original sources were uploaded already') self.__refer_original_sources(src_id) return self.logger.info( 'Cut off working source trees or build directory from original source file names and convert index data' ) os.makedirs('original sources') self.mqs['file names'] = multiprocessing.Queue() self.workers_num = klever.core.utils.get_parallel_threads_num( self.logger, self.conf) subcomponents = [('PSFS', self.__process_source_files)] for i in range(self.workers_num): subcomponents.append(('RSF', self.__process_source_file)) self.launch_subcomponents(False, *subcomponents) self.mqs['file names'].close() self.logger.info('Compress original sources') klever.core.utils.ArchiveFiles(['original sources' ]).make_archive('original sources.zip') self.logger.info('Upload original sources') try: session.upload_original_sources(src_id, 'original sources.zip') # Do not fail if there are already original sources. There may be complex data races because of checking and # uploading original sources archive are not atomic. except klever.core.session.BridgeError: if "original sources with this identifier already exists." not in list( session.error.values())[0]: raise self.__refer_original_sources(src_id) if not self.conf['keep intermediate files']: shutil.rmtree('original sources') os.remove('original sources.zip') def __get_job_or_sub_job_components(self): self.logger.info('Get components for sub-job "{0}"'.format(self.id)) self.components = [ getattr( importlib.import_module('.{0}'.format(component.lower()), 'klever.core'), component) for component in self.CORE_COMPONENTS ] self.logger.debug('Components to be launched: "{0}"'.format(', '.join( [component.__name__ for component in self.components]))) def launch_sub_job_components(self): """Has callbacks""" self.logger.info('Launch components for sub-job "{0}"'.format(self.id)) for component in self.components: p = component(self.common_components_conf, self.logger, self.id, self.callbacks, self.mqs, self.vals, separate_from_parent=True) self.component_processes.append(p) klever.core.components.launch_workers(self.logger, self.component_processes)
class Job(klever.core.components.Component): CORE_COMPONENTS = [ 'PFG', 'VTG', 'VRP' ] def __init__(self, conf, logger, parent_id, callbacks, mqs, vals, id=None, work_dir=None, attrs=None, separate_from_parent=True, include_child_resources=False, components_common_conf=None): super(Job, self).__init__(conf, logger, parent_id, callbacks, mqs, vals, id, work_dir, attrs, separate_from_parent, include_child_resources) self.common_components_conf = components_common_conf if work_dir: self.common_components_conf['additional sources directory'] = os.path.join(os.path.realpath(work_dir), 'additional sources') self.clade = None self.components = [] self.component_processes = [] def decide_job_or_sub_job(self): self.logger.info('Decide job/sub-job "{0}"'.format(self.id)) # This is required to associate verification results with particular sub-jobs. # Skip leading "/" since this identifier is used in os.path.join() that returns absolute path otherwise. self.common_components_conf['sub-job identifier'] = self.id[1:] self.logger.info('Get specifications set') if 'specifications set' in self.common_components_conf: spec_set = self.common_components_conf['specifications set'] else: raise KeyError('Specify attribute "specifications set" within job.json') self.logger.debug('Specifications set is "{0}"'.format(spec_set)) # Check that specifications set is supported. with open(self.common_components_conf['specifications base'], encoding='utf-8') as fp: req_spec_base = json.load(fp) spec_set = self.common_components_conf['specifications set'] if spec_set not in req_spec_base['specification sets']: raise ValueError("Klever does not support specifications set {!r} yet, available options are: {}" .format(spec_set, ', '.join(req_spec_base['specification sets']))) # Check and set build base here since many Core components need it. self.__set_build_base() self.clade = Clade(self.common_components_conf['build base']) if not self.clade.work_dir_ok(): raise RuntimeError(f'Build base "{self.common_components_conf["build base"]}" is not OK') self.__retrieve_working_src_trees() self.__get_original_sources_basic_info() self.__upload_original_sources() # Create directory where files will be cached and remember absolute path to it for components. os.mkdir('cache') self.common_components_conf['cache directory'] = os.path.realpath('cache') if self.common_components_conf['keep intermediate files']: self.logger.debug('Create components configuration file "conf.json"') with open('conf.json', 'w', encoding='utf-8') as fp: json.dump(self.common_components_conf, fp, ensure_ascii=False, sort_keys=True, indent=4) self.__get_job_or_sub_job_components() self.callbacks = klever.core.components.get_component_callbacks(self.logger, [type(self)] + self.components) self.launch_sub_job_components() self.clean_dir = True self.logger.info("All components finished") if self.conf.get('collect total code coverage', None): self.logger.debug('Waiting for a collecting coverage') while not self.vals['coverage_finished'].get(self.common_components_conf['sub-job identifier'], True): time.sleep(1) self.logger.debug("Coverage collected") main = decide_job_or_sub_job def __set_build_base(self): if 'build base' not in self.common_components_conf: raise KeyError("Provide 'build base' configuration option to start verification") common_advice = 'please, fix "job.json" (attribute "build base")' common_advice += ' or/and deployment configuration file (attribute "Klever Build Bases")' # Try to find specified build base either in normal way or additionally in directory "build bases" that is # convenient to use when working with many build bases. try: build_base = klever.core.utils.find_file_or_dir(self.logger, self.common_components_conf['main working directory'], self.common_components_conf['build base']) except FileNotFoundError as e: self.logger.warning('Failed to find build base:\n{}'.format(traceback.format_exc().rstrip())) try: build_base = klever.core.utils.find_file_or_dir( self.logger, self.common_components_conf['main working directory'], os.path.join('build bases', self.common_components_conf['build base'])) except FileNotFoundError as e: self.logger.warning('Failed to find build base:\n{}'.format(traceback.format_exc().rstrip())) raise FileNotFoundError( 'Specified build base "{0}" does not exist, {1}'.format(self.common_components_conf['build base'], common_advice)) from None # Extract build base from archive. There should not be any intermediate directories in archives. if os.path.isfile(build_base) and (tarfile.is_tarfile(build_base) or zipfile.is_zipfile(build_base)): if tarfile.is_tarfile(build_base): self.logger.debug('Build base "{0}" is provided in form of TAR archive'.format(build_base)) with tarfile.open(build_base) as TarFile: TarFile.extractall('build base') else: self.logger.debug('Build base "{0}" is provided in form of ZIP archive'.format(build_base)) with zipfile.ZipFile(build_base) as zfp: zfp.extractall('build base') # Directory contains extracted build base. extracted_from = ' extracted from "{0}"'.format(os.path.realpath(build_base)) build_base = 'build base' else: extracted_from = '' # We need to specify absolute path to build base since it will be used in different Klever components. Besides, # this simplifies troubleshooting. build_base = os.path.realpath(build_base) # TODO: fix after https://github.com/17451k/clade/issues/108. if not os.path.isdir(build_base): raise FileExistsError('Build base "{0}" is not a directory, {1}' .format(build_base, extracted_from, common_advice)) if not os.path.isfile(os.path.join(build_base, 'meta.json')): raise FileExistsError( 'Directory "{0}"{1} is not a build base since it does not contain file "meta.json", {2}' .format(build_base, extracted_from, common_advice)) self.common_components_conf['build base'] = build_base self.logger.debug('Klever components will use build base "{0}"' .format(self.common_components_conf['build base'])) # Klever will try to cut off either working source trees (if specified) or maximum common paths of CC/CL input files # and LD/Link output files (otherwise) from referred file names. Sometimes this is rather optional like for source # files referred by error traces, but, say, for program fragment identifiers this is strictly necessary, e.g. # because of otherwise expert assessment will not work as expected. def __retrieve_working_src_trees(self): clade_meta = self.clade.get_meta() # Best of all if users specify working source trees in build bases manually themselves. It is a most accurate # approach. if 'working source trees' in clade_meta: work_src_trees = clade_meta['working source trees'] # Otherwise try to find out them automatically as described above. else: in_files = [] for cmd in self.clade.get_all_cmds_by_type("CC") + self.clade.get_all_cmds_by_type("CL"): if cmd['in']: for in_file in cmd['in']: # Sometimes some auxiliary stuff is built in addition to normal C source files that are most # likely located in a place we would like to get. if not in_file.startswith('/tmp') and in_file != '/dev/null': in_files.append(os.path.join(cmd['cwd'], in_file)) in_files_prefix = os.path.dirname(os.path.commonprefix(in_files)) self.logger.info('Common prefix of CC/CL input files is "{0}"'.format(in_files_prefix)) out_files = [] for cmd in self.clade.get_all_cmds_by_type("LD") + self.clade.get_all_cmds_by_type("Link"): if cmd['out']: for out_file in cmd['out']: # Like above. if not out_file.startswith('/tmp') and out_file != '/dev/null': out_files.append(os.path.join(cmd['cwd'], out_file)) out_files_prefix = os.path.dirname(os.path.commonprefix(out_files)) self.logger.info('Common prefix of LD/Link output files is "{0}"'.format(out_files_prefix)) # Meaningful paths look like "/dir...". meaningful_paths = [] for path in (in_files_prefix, out_files_prefix): if path and path != os.path.sep and path not in meaningful_paths: meaningful_paths.append(path) if meaningful_paths: work_src_trees = meaningful_paths # At least consider build directory as working source tree if the automatic procedure fails. else: self.logger.warning( 'Consider build directory "{0}" as working source tree.' 'This may be dangerous and we recommend to specify appropriate working source trees manually!' .format(clade_meta['build_dir'])) work_src_trees = [clade_meta['build_dir']] # Consider minimal path if it is common prefix for other ones. For instance, if we have "/dir1/dir2" and "/dir1" # then "/dir1" will become the only working source tree. if len(work_src_trees) > 1: min_work_src_tree = min(work_src_trees) if os.path.commonprefix(work_src_trees) == min_work_src_tree: work_src_trees = [min_work_src_tree] self.logger.info( 'Working source trees to be used are as follows:\n{0}' .format('\n'.join([' {0}'.format(t) for t in work_src_trees]))) self.common_components_conf['working source trees'] = work_src_trees def __refer_original_sources(self, src_id): klever.core.utils.report( self.logger, 'patch', { 'identifier': self.id, 'original_sources': src_id }, self.mqs['report files'], self.vals['report id'], self.conf['main working directory'] ) def __process_source_files(self): for file_name in self.clade.src_info: self.mqs['file names'].put(file_name) for i in range(self.workers_num): self.mqs['file names'].put(None) def __process_source_file(self): while True: file_name = self.mqs['file names'].get() if not file_name: return src_file_name = klever.core.utils.make_relative_path(self.common_components_conf['working source trees'], file_name) if src_file_name != file_name: src_file_name = os.path.join('source files', src_file_name) new_file_name = os.path.join('original sources', src_file_name.lstrip(os.path.sep)) os.makedirs(os.path.dirname(new_file_name), exist_ok=True) shutil.copy(self.clade.get_storage_path(file_name), new_file_name) cross_refs = CrossRefs(self.common_components_conf, self.logger, self.clade, file_name, new_file_name, self.common_components_conf['working source trees'], 'source files') cross_refs.get_cross_refs() def __get_original_sources_basic_info(self): self.logger.info('Get information on original sources for following visualization of uncovered source files') # For each source file we need to know the total number of lines and places where functions are defined. src_files_info = dict() for file_name, file_size in self.clade.src_info.items(): src_file_name = klever.core.utils.make_relative_path(self.common_components_conf['working source trees'], file_name) # Skip non-source files. if src_file_name == file_name: continue src_file_name = os.path.join('source files', src_file_name) src_files_info[src_file_name] = list() # Store source file size. src_files_info[src_file_name].append(file_size['loc']) # Store source file function definition lines. func_def_lines = list() funcs = self.clade.get_functions_by_file([file_name], False) if funcs: for func_name, func_info in list(funcs.values())[0].items(): func_def_lines.append(int(func_info['line'])) src_files_info[src_file_name].append(sorted(func_def_lines)) # Dump obtain information (huge data!) to load it when reporting total code coverage if everything will be okay. with open('original sources basic information.json', 'w') as fp: klever.core.utils.json_dump(src_files_info, fp, self.conf['keep intermediate files']) def __upload_original_sources(self): # Use Clade UUID to distinguish various original sources. It is pretty well since this UUID is uuid.uuid4(). src_id = self.clade.get_uuid() # In addition, take into account a meta content as we like to change it manually often. In this case it may be # necessary to re-index the build base. It is not clear if this is the case actually, so, do this in case of # any changes in meta. src_id += '-' + klever.core.utils.get_file_name_checksum(json.dumps(self.clade.get_meta()))[:12] session = klever.core.session.Session(self.logger, self.conf['Klever Bridge'], self.conf['identifier']) if session.check_original_sources(src_id): self.logger.info('Original sources were uploaded already') self.__refer_original_sources(src_id) return self.logger.info( 'Cut off working source trees or build directory from original source file names and convert index data') os.makedirs('original sources') self.mqs['file names'] = multiprocessing.Queue() self.workers_num = klever.core.utils.get_parallel_threads_num(self.logger, self.conf) subcomponents = [('PSFS', self.__process_source_files)] for i in range(self.workers_num): subcomponents.append(('PSF', self.__process_source_file)) self.launch_subcomponents(False, *subcomponents) self.mqs['file names'].close() self.logger.info('Compress original sources') klever.core.utils.ArchiveFiles(['original sources']).make_archive('original sources.zip') self.logger.info('Upload original sources') try: session.upload_original_sources(src_id, 'original sources.zip') # Do not fail if there are already original sources. There may be complex data races because of checking and # uploading original sources archive are not atomic. except klever.core.session.BridgeError: if "original sources with this identifier already exists." not in list(session.error.values())[0]: raise self.__refer_original_sources(src_id) if not self.conf['keep intermediate files']: shutil.rmtree('original sources') os.remove('original sources.zip') def __get_job_or_sub_job_components(self): self.logger.info('Get components for sub-job "{0}"'.format(self.id)) self.components = [getattr(importlib.import_module('.{0}'.format(component.lower()), 'klever.core'), component) for component in self.CORE_COMPONENTS] self.logger.debug('Components to be launched: "{0}"'.format( ', '.join([component.__name__ for component in self.components]))) def launch_sub_job_components(self): """Has callbacks""" self.logger.info('Launch components for sub-job "{0}"'.format(self.id)) for component in self.components: p = component(self.common_components_conf, self.logger, self.id, self.callbacks, self.mqs, self.vals, separate_from_parent=True) self.component_processes.append(p) klever.core.components.launch_workers(self.logger, self.component_processes)