def fetch_typos_info(string_constants: List[str], vocabulary_path: str = None, db_path: str = None) -> List[TypoInfo]: typos_info: List[TypoInfo] = [] backends = [ process_with_vocabulary, process_with_db_with_cache, process_with_ya_speller, ] backend_config: BackendsConfig = { 'vocabulary_path': vocabulary_path, 'db_path': db_path, 'speller_chunk_size': DEFAULT_WORDS_CHUNK_SIZE, } for words_chunk in chunks(string_constants, backend_config['speller_chunk_size']): for words_processor in backends: sure_correct, sure_with_typo_info, unknown = words_processor( words_chunk, backend_config) typos_info += sure_with_typo_info # переопределяем переменную цикла так, чтобы следующему процессору доставались # только слова, по которым не известно, ок ли они words_chunk = unknown return typos_info
def extract_all_constants_from_path( path: str, exclude: List[str], process_dots: bool, processes_amount: int, verbosity: int = 0, ) -> List[str]: extractors = [ (extract_from_python_src, ['py', 'pyi']), (extract_from_markdown, ['md']), (extract_from_html, ['html']), (extract_from_js, ['js', 'ts', 'tsx']), (extract_from_po, ['po']), ] extension_to_extractor_mapping: DefaultDict[ str, List[Callable]] = collections.defaultdict(list) for extractor, extensions in extractors: for extension in extensions: extension_to_extractor_mapping[extension].append(extractor) string_constants: List[str] = [] for extension, extension_extractors in extension_to_extractor_mapping.items( ): if os.path.isdir(path): all_files = get_all_filepathes_recursively(path, exclude, extension) else: all_files = [path] if path.endswith(extension) else [] if not process_dots: all_files = [ f for f in all_files if '/.' not in f and not f.startswith('.') ] if not all_files: continue chunk_size = math.ceil(len(all_files) / processes_amount) new_strings = multiprocessing.Pool(processes_amount).map( functools.partial( extract_all_constants_from_files, extractors=extension_extractors, verbosity=verbosity, ), chunks(all_files, chunk_size), ) string_constants += flat(new_strings) return list(set(string_constants))
def test_calculate_age_works_fine(test_value, expected_result): assert list(chunks(*test_value)) == expected_result