def instantiate_processes(section, local_bear_list, global_bear_list, job_count, log_printer): """ Instantiate the number of processes that will run bears which will be responsible for running bears in a multiprocessing environment. :param section: The section the bears belong to. :param local_bear_list: List of local bears belonging to the section. :param global_bear_list: List of global bears belonging to the section. :param job_count: Max number of processes to create. :param log_printer: The log printer to warn to. :return: A tuple containing a list of processes, and the arguments passed to each process which are the same for each object. """ filename_list = collect_files( glob_list(section.get('files', "")), log_printer, ignored_file_paths=glob_list(section.get('ignore', "")), limit_file_paths=glob_list(section.get('limit_files', ""))) file_dict = get_file_dict(filename_list, log_printer) manager = multiprocessing.Manager() global_bear_queue = multiprocessing.Queue() filename_queue = multiprocessing.Queue() local_result_dict = manager.dict() global_result_dict = manager.dict() message_queue = multiprocessing.Queue() control_queue = multiprocessing.Queue() bear_runner_args = {"file_name_queue": filename_queue, "local_bear_list": local_bear_list, "global_bear_list": global_bear_list, "global_bear_queue": global_bear_queue, "file_dict": file_dict, "local_result_dict": local_result_dict, "global_result_dict": global_result_dict, "message_queue": message_queue, "control_queue": control_queue, "timeout": 0.1} local_bear_list[:], global_bear_list[:] = instantiate_bears( section, local_bear_list, global_bear_list, file_dict, message_queue) fill_queue(filename_queue, file_dict.keys()) fill_queue(global_bear_queue, range(len(global_bear_list))) return ([multiprocessing.Process(target=run, kwargs=bear_runner_args) for i in range(job_count)], bear_runner_args)
def instantiate_processes(section, local_bear_list, global_bear_list, job_count, log_printer): """ Instantiate the number of processes that will run bears which will be responsible for running bears in a multiprocessing environment. :param section: The section the bears belong to. :param local_bear_list: List of local bears belonging to the section. :param global_bear_list: List of global bears belonging to the section. :param job_count: Max number of processes to create. :param log_printer: The log printer to warn to. :return: A tuple containing a list of processes, and the arguments passed to each process which are the same for each object. """ filename_list = collect_files( glob_list(section.get('files', "")), log_printer, ignored_file_paths=glob_list(section.get('ignore', "")), limit_file_paths=glob_list(section.get('limit_files', ""))) file_dict = get_file_dict(filename_list, log_printer) manager = multiprocessing.Manager() global_bear_queue = multiprocessing.Queue() filename_queue = multiprocessing.Queue() local_result_dict = manager.dict() global_result_dict = manager.dict() message_queue = multiprocessing.Queue() control_queue = multiprocessing.Queue() bear_runner_args = { "file_name_queue": filename_queue, "local_bear_list": local_bear_list, "global_bear_list": global_bear_list, "global_bear_queue": global_bear_queue, "file_dict": file_dict, "local_result_dict": local_result_dict, "global_result_dict": global_result_dict, "message_queue": message_queue, "control_queue": control_queue, "timeout": 0.1 } local_bear_list[:], global_bear_list[:] = instantiate_bears( section, local_bear_list, global_bear_list, file_dict, message_queue) fill_queue(filename_queue, file_dict.keys()) fill_queue(global_bear_queue, range(len(global_bear_list))) return ([ multiprocessing.Process(target=run, kwargs=bear_runner_args) for i in range(job_count) ], bear_runner_args)
def test_glob_list(self): abspath = glob_escape(os.path.abspath('.')) # Need to escape backslashes since we use list conversion self.uut = Setting('key', '., ' + abspath.replace('\\', '\\\\'), origin=os.path.join('test (1)', 'somefile')) self.assertEqual( glob_list(self.uut), [glob_escape(os.path.abspath(os.path.join('test (1)', '.'))), abspath]) self.uut = Setting('key', '.,' + abspath.replace('\\', '\\\\'), origin=SourcePosition( os.path.join('test (1)', 'somefile'))) self.assertEqual(glob_list(self.uut), [glob_escape(os.path.abspath( os.path.join('test (1)', '.'))), abspath])
def test_glob_list(self): abspath = glob_escape(os.path.abspath(".")) # Need to escape backslashes since we use list conversion self.uut = Setting("key", "., " + abspath.replace("\\", "\\\\"), origin=os.path.join("test (1)", "somefile")) self.assertEqual( glob_list(self.uut), [glob_escape(os.path.abspath(os.path.join("test (1)", "."))), abspath])
def test_glob_list(self): abspath = glob_escape(os.path.abspath(".")) # Need to escape backslashes since we use list conversion self.uut = Setting("key", "., " + abspath.replace("\\", "\\\\"), origin=os.path.join("test (1)", "somefile")) self.assertEqual(glob_list(self.uut), [ glob_escape(os.path.abspath(os.path.join("test (1)", "."))), abspath ])
def bear_dirs(self): bear_dirs = glob_list(self.get("bear_dirs", "")) for bear_dir in bear_dirs: sys.path.append(bear_dir) bear_dirs = [ os.path.join(bear_dir, "**") for bear_dir in bear_dirs] bear_dirs += [ os.path.join(bear_dir, "**") for bear_dir in collect_registered_bears_dirs('coalabears')] return bear_dirs
def instantiate_processes(section, local_bear_list, global_bear_list, job_count, cache, log_printer, console_printer): """ Instantiate the number of processes that will run bears which will be responsible for running bears in a multiprocessing environment. :param section: The section the bears belong to. :param local_bear_list: List of local bears belonging to the section. :param global_bear_list: List of global bears belonging to the section. :param job_count: Max number of processes to create. :param cache: An instance of ``misc.Caching.FileCache`` to use as a file cache buffer. :param log_printer: The log printer to warn to. :param console_printer: Object to print messages on the console. :return: A tuple containing a list of processes, and the arguments passed to each process which are the same for each object. """ filename_list = collect_files( glob_list(section.get('files', '')), log_printer, ignored_file_paths=glob_list(section.get('ignore', '')), limit_file_paths=glob_list(section.get('limit_files', ''))) # This stores all matched files irrespective of whether coala is run # only on changed files or not. Global bears require all the files complete_filename_list = filename_list # Start tracking all the files if cache: cache.track_files(set(complete_filename_list)) changed_files = cache.get_uncached_files( set(filename_list)) if cache else filename_list # If caching is enabled then the local bears should process only the # changed files. log_printer.debug("coala is run only on changed files, bears' log " 'messages from previous runs may not appear. You may ' 'use the `--flush-cache` flag to see them.') filename_list = changed_files # Note: the complete file dict is given as the file dict to bears and # the whole project is accessible to every bear. However, local bears are # run only for the changed files if caching is enabled. complete_file_dict = get_file_dict(complete_filename_list, log_printer) file_dict = {filename: complete_file_dict[filename] for filename in filename_list if filename in complete_file_dict} manager = multiprocessing.Manager() global_bear_queue = multiprocessing.Queue() filename_queue = multiprocessing.Queue() local_result_dict = manager.dict() global_result_dict = manager.dict() message_queue = multiprocessing.Queue() control_queue = multiprocessing.Queue() bear_runner_args = {'file_name_queue': filename_queue, 'local_bear_list': local_bear_list, 'global_bear_list': global_bear_list, 'global_bear_queue': global_bear_queue, 'file_dict': file_dict, 'local_result_dict': local_result_dict, 'global_result_dict': global_result_dict, 'message_queue': message_queue, 'control_queue': control_queue, 'timeout': 0.1} local_bear_list[:], global_bear_list[:] = instantiate_bears( section, local_bear_list, global_bear_list, complete_file_dict, message_queue, console_printer=console_printer) fill_queue(filename_queue, file_dict.keys()) fill_queue(global_bear_queue, range(len(global_bear_list))) return ([multiprocessing.Process(target=run, kwargs=bear_runner_args) for i in range(job_count)], bear_runner_args)
def instantiate_processes(section, local_bear_list, global_bear_list, job_count, cache, log_printer, console_printer, debug=False, use_raw_files=False): """ Instantiate the number of processes that will run bears which will be responsible for running bears in a multiprocessing environment. :param section: The section the bears belong to. :param local_bear_list: List of local bears belonging to the section. :param global_bear_list: List of global bears belonging to the section. :param job_count: Max number of processes to create. :param cache: An instance of ``misc.Caching.FileCache`` to use as a file cache buffer. :param log_printer: The log printer to warn to. :param use_raw_files: Allow the usage of raw files (non text files) :param console_printer: Object to print messages on the console. :param debug: Bypass multiprocessing and activate debug mode for bears, not catching any exceptions on running them. :return: A tuple containing a list of processes, and the arguments passed to each process which are the same for each object. """ filename_list = collect_files( glob_list(section.get('files', '')), log_printer, ignored_file_paths=glob_list(section.get('ignore', '')), limit_file_paths=glob_list(section.get('limit_files', '')), section_name=section.name) # This stores all matched files irrespective of whether coala is run # only on changed files or not. Global bears require all the files complete_filename_list = filename_list complete_file_dict = get_file_dict(complete_filename_list, log_printer, use_raw_files) if debug: from . import DebugProcessing as processing else: import multiprocessing as processing manager = processing.Manager() global_bear_queue = processing.Queue() filename_queue = processing.Queue() local_result_dict = manager.dict() global_result_dict = manager.dict() message_queue = processing.Queue() control_queue = processing.Queue() loaded_local_bears_count = len(local_bear_list) local_bear_list[:], global_bear_list[:] = instantiate_bears( section, local_bear_list, global_bear_list, complete_file_dict, message_queue, console_printer=console_printer, debug=debug) loaded_valid_local_bears_count = len(local_bear_list) # Note: the complete file dict is given as the file dict to bears and # the whole project is accessible to every bear. However, local bears are # run only for the changed files if caching is enabled. # Start tracking all the files if cache and (loaded_valid_local_bears_count == loaded_local_bears_count and not use_raw_files): cache.track_files(set(complete_filename_list)) changed_files = cache.get_uncached_files( set(filename_list)) if cache else filename_list # If caching is enabled then the local bears should process only the # changed files. log_printer.debug("coala is run only on changed files, bears' log " 'messages from previous runs may not appear. You may ' 'use the `--flush-cache` flag to see them.') filename_list = changed_files # Note: the complete file dict is given as the file dict to bears and # the whole project is accessible to every bear. However, local bears are # run only for the changed files if caching is enabled. file_dict = {filename: complete_file_dict[filename] for filename in filename_list if filename in complete_file_dict} bear_runner_args = {'file_name_queue': filename_queue, 'local_bear_list': local_bear_list, 'global_bear_list': global_bear_list, 'global_bear_queue': global_bear_queue, 'file_dict': file_dict, 'local_result_dict': local_result_dict, 'global_result_dict': global_result_dict, 'message_queue': message_queue, 'control_queue': control_queue, 'timeout': 0.1, 'debug': debug} fill_queue(filename_queue, file_dict.keys()) fill_queue(global_bear_queue, range(len(global_bear_list))) return ([processing.Process(target=run, kwargs=bear_runner_args) for i in range(job_count)], bear_runner_args)
def instantiate_processes(section, local_bear_list, global_bear_list, job_count, cache, log_printer): """ Instantiate the number of processes that will run bears which will be responsible for running bears in a multiprocessing environment. :param section: The section the bears belong to. :param local_bear_list: List of local bears belonging to the section. :param global_bear_list: List of global bears belonging to the section. :param job_count: Max number of processes to create. :param cache: An instance of ``misc.Caching.FileCache`` to use as a file cache buffer. :param log_printer: The log printer to warn to. :return: A tuple containing a list of processes, and the arguments passed to each process which are the same for each object. """ filename_list = collect_files( glob_list(section.get('files', "")), log_printer, ignored_file_paths=glob_list(section.get('ignore', "")), limit_file_paths=glob_list(section.get('limit_files', ""))) # This stores all matched files irrespective of whether coala is run # only on changed files or not. Global bears require all the files complete_filename_list = filename_list # Start tracking all the files if cache: cache.track_files(set(complete_filename_list)) changed_files = cache.get_uncached_files( set(filename_list)) if cache else filename_list # If caching is enabled then the local bears should process only the # changed files. log_printer.debug( "coala is run only on changed files, bears' log " "messages from previous runs may not appear. You may " "use the `--flush-cache` flag to see them.") filename_list = changed_files # Note: the complete file dict is given as the file dict to bears and # the whole project is accessible to every bear. However, local bears are # run only for the changed files if caching is enabled. complete_file_dict = get_file_dict(complete_filename_list, log_printer) file_dict = { filename: complete_file_dict[filename] for filename in filename_list if filename in complete_file_dict } manager = multiprocessing.Manager() global_bear_queue = multiprocessing.Queue() filename_queue = multiprocessing.Queue() local_result_dict = manager.dict() global_result_dict = manager.dict() message_queue = multiprocessing.Queue() control_queue = multiprocessing.Queue() bear_runner_args = { "file_name_queue": filename_queue, "local_bear_list": local_bear_list, "global_bear_list": global_bear_list, "global_bear_queue": global_bear_queue, "file_dict": file_dict, "local_result_dict": local_result_dict, "global_result_dict": global_result_dict, "message_queue": message_queue, "control_queue": control_queue, "timeout": 0.1 } local_bear_list[:], global_bear_list[:] = instantiate_bears( section, local_bear_list, global_bear_list, complete_file_dict, message_queue) fill_queue(filename_queue, file_dict.keys()) fill_queue(global_bear_queue, range(len(global_bear_list))) return ([ multiprocessing.Process(target=run, kwargs=bear_runner_args) for i in range(job_count) ], bear_runner_args)
def Analyze(self): """ This method analyzes the document and sends back the result :return: The output is structure which has 3 items: - The exitcode from the analysis. - List of logs from the analysis. - List of information about each section that contains: - The name of the section. - Boolean which is true if all bears in the section executed successfully. - List of results where each result is a string dictionary which contains: id, origin, message, file, line_nr, severity """ retval = [] if self.path == "" or self.config_file == "": return retval args = ["--config=" + self.config_file] log_printer = ListLogPrinter() exitcode = 0 try: yielded_results = False (sections, local_bears, global_bears, targets) = gather_configuration(fail_acquire_settings, log_printer, arg_list=args) for section_name in sections: section = sections[section_name] if not section.is_enabled(targets): continue if any([fnmatch(self.path, file_pattern) for file_pattern in glob_list(section["files"])]): section["files"].value = self.path section_result = execute_section( section=section, global_bear_list=global_bears[section_name], local_bear_list=local_bears[section_name], print_results=lambda *args: True, log_printer=log_printer) yielded_results = yielded_results or section_result[0] retval.append( DbusDocument.results_to_dbus_struct(section_result, section_name)) if yielded_results: exitcode = 1 except BaseException as exception: # pylint: disable=broad-except exitcode = exitcode or get_exitcode(exception, log_printer) logs = [log.to_string_dict() for log in log_printer.logs] return (exitcode, logs, retval)
def instantiate_processes(section, local_bear_list, global_bear_list, job_count, cache, log_printer, console_printer, debug=False, use_raw_files=False, debug_bears=False): """ Instantiate the number of processes that will run bears which will be responsible for running bears in a multiprocessing environment. :param section: The section the bears belong to. :param local_bear_list: List of local bears belonging to the section. :param global_bear_list: List of global bears belonging to the section. :param job_count: Max number of processes to create. :param cache: An instance of ``misc.Caching.FileCache`` to use as a file cache buffer. :param log_printer: The log printer to warn to. :param console_printer: Object to print messages on the console. :param debug: Bypass multiprocessing and activate debug mode for bears, not catching any exceptions on running them. :param use_raw_files: Allow the usage of raw files (non text files) :return: A tuple containing a list of processes, and the arguments passed to each process which are the same for each object. """ filename_list = collect_files( glob_list(section.get('files', '')), None, ignored_file_paths=glob_list(section.get('ignore', '')), limit_file_paths=glob_list(section.get('limit_files', '')), section_name=section.name) # This stores all matched files irrespective of whether coala is run # only on changed files or not. Global bears require all the files complete_filename_list = filename_list file_dict_generator = get_file_dict if cache is not None and isinstance(cache, FileDictGenerator): file_dict_generator = cache.get_file_dict complete_file_dict = file_dict_generator(complete_filename_list, allow_raw_files=use_raw_files) logging.debug('Files that will be checked:\n' + '\n'.join(complete_file_dict.keys())) if debug or debug_bears: from . import DebugProcessing as processing else: import multiprocessing as processing manager = processing.Manager() global_bear_queue = processing.Queue() filename_queue = processing.Queue() local_result_dict = manager.dict() global_result_dict = manager.dict() message_queue = processing.Queue() control_queue = processing.Queue() loaded_local_bears_count = len(local_bear_list) local_bear_list[:], global_bear_list[:] = instantiate_bears( section, local_bear_list, global_bear_list, complete_file_dict, message_queue, console_printer=console_printer, debug=debug) loaded_valid_local_bears_count = len(local_bear_list) # Note: the complete file dict is given as the file dict to bears and # the whole project is accessible to every bear. However, local bears are # run only for the changed files if caching is enabled. # Start tracking all the files if cache and (loaded_valid_local_bears_count == loaded_local_bears_count and not use_raw_files): cache.track_files(set(complete_filename_list)) changed_files = cache.get_uncached_files( set(filename_list)) if cache else filename_list # If caching is enabled then the local bears should process only the # changed files. logging.debug("coala is run only on changed files, bears' log " 'messages from previous runs may not appear. You may ' 'use the `--flush-cache` flag to see them.') filename_list = changed_files # Note: the complete file dict is given as the file dict to bears and # the whole project is accessible to every bear. However, local bears are # run only for the changed files if caching is enabled. file_dict = {filename: complete_file_dict[filename] for filename in filename_list if filename in complete_file_dict} bear_runner_args = {'file_name_queue': filename_queue, 'local_bear_list': local_bear_list, 'global_bear_list': global_bear_list, 'global_bear_queue': global_bear_queue, 'file_dict': file_dict, 'local_result_dict': local_result_dict, 'global_result_dict': global_result_dict, 'message_queue': message_queue, 'control_queue': control_queue, 'timeout': 0.1, 'debug': debug} fill_queue(filename_queue, file_dict.keys()) fill_queue(global_bear_queue, range(len(global_bear_list))) return ([processing.Process(target=run, kwargs=bear_runner_args) for i in range(job_count)], bear_runner_args)
def Analyze(self): """ This method analyzes the document and sends back the result :return: The output is structure which has 3 items: - The exitcode from the analysis. - List of logs from the analysis. - List of information about each section that contains: - The name of the section. - Boolean which is true if all bears in the section executed successfully. - List of results where each result is a string dictionary which contains: id, origin, message, file, line_nr, severity """ retval = [] if self.path == "" or self.config_file == "": return retval args = ["--config=" + self.config_file] log_printer = ListLogPrinter() exitcode = 0 try: yielded_results = False (sections, local_bears, global_bears, targets) = gather_configuration(fail_acquire_settings, log_printer, arg_list=args) for section_name in sections: section = sections[section_name] if not section.is_enabled(targets): continue if any([fnmatch(self.path, file_pattern) for file_pattern in glob_list(section["files"])]): section["files"].value = self.path # TODO: Integrate with caching section_result = execute_section( section=section, global_bear_list=global_bears[section_name], local_bear_list=local_bears[section_name], print_results=lambda *args: True, cache=None, log_printer=log_printer) yielded_results = yielded_results or section_result[0] retval.append( DbusDocument.results_to_dbus_struct(section_result, section_name)) if yielded_results: exitcode = 1 except BaseException as exception: # pylint: disable=broad-except exitcode = exitcode or get_exitcode(exception, log_printer) logs = [log.to_string_dict() for log in log_printer.logs] return (exitcode, logs, retval)