def build_docs_for_packages( current_packages: List[str], docs_only: bool, spellcheck_only: bool, for_production: bool, verbose: bool ) -> Tuple[Dict[str, List[DocBuildError]], Dict[str, List[SpellingError]]]: """Builds documentation for single package and returns errors""" all_build_errors: Dict[str, List[DocBuildError]] = defaultdict(list) all_spelling_errors: Dict[str, List[SpellingError]] = defaultdict(list) for package_no, package_name in enumerate(current_packages, start=1): print("#" * 20, f"[{package_no}/{len(current_packages)}] {package_name}", "#" * 20) builder = AirflowDocsBuilder(package_name=package_name, for_production=for_production, verbose=verbose) builder.clean_files() if not docs_only: with with_group(f"Check spelling: {package_name}"): spelling_errors = builder.check_spelling() if spelling_errors: all_spelling_errors[package_name].extend(spelling_errors) if not spellcheck_only: with with_group(f"Building docs: {package_name}"): docs_errors = builder.build_sphinx_docs() if docs_errors: all_build_errors[package_name].extend(docs_errors) return all_build_errors, all_spelling_errors
def run_spell_check_in_parallel( all_spelling_errors: Dict[str, List[SpellingError]], for_production: bool, current_packages: List[str], verbose: bool, pool, ): """Runs spell check in parallel.""" spell_check_specifications: List[BuildSpecification] = [] with with_group("Scheduling spell checking of documentation"): for package_name in current_packages: console.print( f"[blue]{package_name:60}:[/] Scheduling spellchecking") spell_check_specifications.append( BuildSpecification(package_name=package_name, for_production=for_production, verbose=verbose)) with with_group("Running spell checking of documentation"): console.print() result_list = pool.map(perform_spell_check_for_single_package, spell_check_specifications) for result in result_list: if result.errors: all_spelling_errors[result.package_name].extend(result.errors) print_spelling_output(result)
def run_docs_build_in_parallel( all_build_errors: Dict[str, List[DocBuildError]], for_production: bool, current_packages: List[str], verbose: bool, pool, ): """Runs documentation building in parallel.""" doc_build_specifications: List[BuildSpecification] = [] with with_group("Scheduling documentation to build"): for package_name in current_packages: console.print( f"[blue]{package_name:60}:[/] Scheduling documentation to build" ) doc_build_specifications.append( BuildSpecification( package_name=package_name, for_production=for_production, verbose=verbose, )) with with_group("Running docs building"): console.print() result_list = pool.map(perform_docs_build_for_single_package, doc_build_specifications) for result in result_list: if result.errors: all_build_errors[result.package_name].extend(result.errors) print_build_output(result)
def print_build_output(result: BuildDocsResult): """Prints output of docs build job.""" with with_group(f"{TEXT_RED}Output for documentation build {result.package_name}{TEXT_RESET}"): console.print() console.print(f"[blue]{result.package_name:60}: " + "#" * 80) with open(result.log_file_name) as output: for line in output.read().splitlines(): console.print(f"{result.package_name:60} {line}") console.print(f"[blue]{result.package_name:60}: " + "#" * 80)
def print_spelling_output(result: SpellCheckResult): """Prints output of spell check job.""" with with_group(f"{TEXT_RED}Output for spelling check: {result.package_name}{TEXT_RESET}"): console.print() console.print(f"[blue]{result.package_name:60}: " + "#" * 80) with open(result.log_file_name) as output: for line in output.read().splitlines(): console.print(f"{result.package_name:60} {line}") console.print(f"[blue]{result.package_name:60}: " + "#" * 80) console.print()
def build_docs_for_packages( current_packages: List[str], docs_only: bool, spellcheck_only: bool, for_production: bool, jobs: int, verbose: bool, ) -> Tuple[Dict[str, List[DocBuildError]], Dict[str, List[SpellingError]]]: """Builds documentation for all packages and combines errors.""" all_build_errors: Dict[str, List[DocBuildError]] = defaultdict(list) all_spelling_errors: Dict[str, List[SpellingError]] = defaultdict(list) with with_group("Cleaning documentation files"): for package_name in current_packages: console.print(f"[blue]{package_name:60}:[/] Cleaning files") builder = AirflowDocsBuilder(package_name=package_name, for_production=for_production) builder.clean_files() if jobs > 1: if os.getenv('CI', '') == '': console.print( "[yellow] PARALLEL DOCKERIZED EXECUTION REQUIRES IMAGE TO BE BUILD BEFORE !!!![/]" ) console.print( "[yellow] Make sure that you've build the image before runnning docs build.[/]" ) console.print( "[yellow] otherwise local changes you've done will not be used during the check[/]" ) console.print() run_in_parallel( all_build_errors, all_spelling_errors, current_packages, docs_only, for_production, jobs, spellcheck_only, verbose, ) else: run_sequentially( all_build_errors, all_spelling_errors, current_packages, docs_only, for_production, spellcheck_only, verbose, ) return all_build_errors, all_spelling_errors
def check_spelling(self): """Checks spelling.""" spelling_errors = [] with TemporaryDirectory() as tmp_dir, with_group( f"Check spelling: {self.package_name}"): build_cmd = [ "sphinx-build", "-W", # turn warnings into errors "-T", # show full traceback on exception "-b", # builder to use "spelling", "-c", DOCS_DIR, "-d", # path for the cached environment and doctree files self._doctree_dir, self._src_dir, # path to documentation source files tmp_dir, ] print("Executing cmd: ", " ".join([shlex.quote(c) for c in build_cmd])) env = os.environ.copy() env['AIRFLOW_PACKAGE_NAME'] = self.package_name if self.for_production: env['AIRFLOW_FOR_PRODUCTION'] = 'true' completed_proc = run( # pylint: disable=subprocess-run-check build_cmd, cwd=self._src_dir, env=env) if completed_proc.returncode != 0: spelling_errors.append( SpellingError( file_path=None, line_no=None, spelling=None, suggestion=None, context_line=None, message= (f"Sphinx spellcheck returned non-zero exit status: {completed_proc.returncode}." ), )) warning_text = "" for filepath in glob(f"{tmp_dir}/**/*.spelling", recursive=True): with open(filepath) as speeling_file: warning_text += speeling_file.read() spelling_errors.extend( parse_spelling_warnings(warning_text, self._src_dir)) return spelling_errors
def build_sphinx_docs(self) -> List[DocBuildError]: """Build Sphinx documentation""" build_errors = [] with NamedTemporaryFile() as tmp_file, with_group( f"Building docs: {self.package_name}"): build_cmd = [ "sphinx-build", "-T", # show full traceback on exception "--color", # do emit colored output "-b", # builder to use "html", "-d", # path for the cached environment and doctree files self._doctree_dir, "-c", DOCS_DIR, "-w", # write warnings (and errors) to given file tmp_file.name, self._src_dir, # path to documentation source files self._build_dir, # path to output directory ] print("Executing cmd: ", " ".join([shlex.quote(c) for c in build_cmd])) env = os.environ.copy() env['AIRFLOW_PACKAGE_NAME'] = self.package_name if self.for_production: env['AIRFLOW_FOR_PRODUCTION'] = 'true' completed_proc = run( # pylint: disable=subprocess-run-check build_cmd, cwd=self._src_dir, env=env) if completed_proc.returncode != 0: build_errors.append( DocBuildError( file_path=None, line_no=None, message= f"Sphinx returned non-zero exit status: {completed_proc.returncode}.", )) tmp_file.seek(0) warning_text = tmp_file.read().decode() # Remove 7-bit C1 ANSI escape sequences warning_text = re.sub(r"\x1B[@-_][0-?]*[ -/]*[@-~]", "", warning_text) build_errors.extend( parse_sphinx_warnings(warning_text, self._src_dir)) return build_errors
def build_docs_for_packages( current_packages: List[str], docs_only: bool, spellcheck_only: bool, for_production: bool, jobs: int, verbose: bool, ) -> Tuple[Dict[str, List[DocBuildError]], Dict[str, List[SpellingError]]]: """Builds documentation for all packages and combines errors.""" all_build_errors: Dict[str, List[DocBuildError]] = defaultdict(list) all_spelling_errors: Dict[str, List[SpellingError]] = defaultdict(list) with with_group("Cleaning documentation files"): for package_name in current_packages: console.print(f"[blue]{package_name:60}:[/] Cleaning files") builder = AirflowDocsBuilder(package_name=package_name, for_production=for_production) builder.clean_files() if jobs > 1: run_in_parallel( all_build_errors, all_spelling_errors, current_packages, docs_only, for_production, jobs, spellcheck_only, verbose, ) else: run_sequentially( all_build_errors, all_spelling_errors, current_packages, docs_only, for_production, spellcheck_only, verbose, ) return all_build_errors, all_spelling_errors
def main(): """Main code""" args = _get_parser().parse_args() available_packages = get_available_packages() docs_only = args.docs_only spellcheck_only = args.spellcheck_only disable_checks = args.disable_checks package_filters = args.package_filter for_production = args.for_production if not package_filters: _promote_new_flags() with with_group("Available packages"): for pkg in available_packages: print(f" - {pkg}") if package_filters: print("Current package filters: ", package_filters) current_packages = process_package_filters(available_packages, package_filters) with with_group( f"Documentation will be built for {len(current_packages)} package(s)" ): for pkg_no, pkg in enumerate(current_packages, start=1): print(f"{pkg_no}. {pkg}") with with_group("Fetching inventories"): fetch_inventories() all_build_errors: Dict[Optional[str], List[DocBuildError]] = {} all_spelling_errors: Dict[Optional[str], List[SpellingError]] = {} package_build_errors, package_spelling_errors = build_docs_for_packages( current_packages=current_packages, docs_only=docs_only, spellcheck_only=spellcheck_only, for_production=for_production, verbose=args.verbose, ) if package_build_errors: all_build_errors.update(package_build_errors) if package_spelling_errors: all_spelling_errors.update(package_spelling_errors) to_retry_packages = [ package_name for package_name, errors in package_build_errors.items() if any( any((m in e.message) for m in ERRORS_ELIGIBLE_TO_REBUILD) for e in errors) ] if to_retry_packages: for package_name in to_retry_packages: if package_name in all_build_errors: del all_build_errors[package_name] if package_name in all_spelling_errors: del all_spelling_errors[package_name] package_build_errors, package_spelling_errors = build_docs_for_packages( current_packages=to_retry_packages, docs_only=docs_only, spellcheck_only=spellcheck_only, for_production=for_production, verbose=args.verbose, ) if package_build_errors: all_build_errors.update(package_build_errors) if package_spelling_errors: all_spelling_errors.update(package_spelling_errors) if not disable_checks: general_errors = lint_checks.run_all_check() if general_errors: all_build_errors[None] = general_errors dev_index_generator.generate_index(f"{DOCS_DIR}/_build/index.html") if not package_filters: _promote_new_flags() print_build_errors_and_exit( all_build_errors, all_spelling_errors, )
def main(): """Main code""" args = _get_parser().parse_args() available_packages = get_available_packages() docs_only = args.docs_only spellcheck_only = args.spellcheck_only disable_checks = args.disable_checks package_filters = args.package_filter for_production = args.for_production with with_group("Available packages"): for pkg in sorted(available_packages): console.print(f" - {pkg}") if package_filters: console.print("Current package filters: ", package_filters) current_packages = process_package_filters(available_packages, package_filters) with with_group("Fetching inventories"): # Inventories that could not be retrieved should be built first. This may mean this is a # new package. packages_without_inventories = fetch_inventories() normal_packages, priority_packages = partition( lambda d: d in packages_without_inventories, current_packages) normal_packages, priority_packages = list(normal_packages), list( priority_packages) jobs = args.jobs if args.jobs != 0 else os.cpu_count() with with_group( f"Documentation will be built for {len(current_packages)} package(s) with {jobs} parallel jobs" ): for pkg_no, pkg in enumerate(current_packages, start=1): console.print(f"{pkg_no}. {pkg}") all_build_errors: Dict[Optional[str], List[DocBuildError]] = {} all_spelling_errors: Dict[Optional[str], List[SpellingError]] = {} if priority_packages: # Build priority packages package_build_errors, package_spelling_errors = build_docs_for_packages( current_packages=priority_packages, docs_only=docs_only, spellcheck_only=spellcheck_only, for_production=for_production, jobs=jobs, verbose=args.verbose, ) if package_build_errors: all_build_errors.update(package_build_errors) if package_spelling_errors: all_spelling_errors.update(package_spelling_errors) # Build normal packages # If only one inventory is missing, the remaining packages are correct. If we are missing # two or more inventories, it is better to try to build for all packages as the previous packages # may have failed as well. package_build_errors, package_spelling_errors = build_docs_for_packages( current_packages=current_packages if len(priority_packages) > 1 else normal_packages, docs_only=docs_only, spellcheck_only=spellcheck_only, for_production=for_production, jobs=jobs, verbose=args.verbose, ) if package_build_errors: all_build_errors.update(package_build_errors) if package_spelling_errors: all_spelling_errors.update(package_spelling_errors) # Build documentation for some packages again if it can help them. to_retry_packages = [ package_name for package_name, errors in package_build_errors.items() if any( any((m in e.message) for m in ERRORS_ELIGIBLE_TO_REBUILD) for e in errors) ] if to_retry_packages: for package_name in to_retry_packages: if package_name in all_build_errors: del all_build_errors[package_name] if package_name in all_spelling_errors: del all_spelling_errors[package_name] package_build_errors, package_spelling_errors = build_docs_for_packages( current_packages=to_retry_packages, docs_only=docs_only, spellcheck_only=spellcheck_only, for_production=for_production, jobs=jobs, verbose=args.verbose, ) if package_build_errors: all_build_errors.update(package_build_errors) if package_spelling_errors: all_spelling_errors.update(package_spelling_errors) if not disable_checks: general_errors = lint_checks.run_all_check() if general_errors: all_build_errors[None] = general_errors dev_index_generator.generate_index(f"{DOCS_DIR}/_build/index.html") if not package_filters: _promote_new_flags() if os.path.exists(PROVIDER_INIT_FILE): os.remove(PROVIDER_INIT_FILE) print_build_errors_and_exit( all_build_errors, all_spelling_errors, )
def main(): """Main code""" args = _get_parser().parse_args() available_packages = get_available_packages() with with_group("Available packages"): for pkg in available_packages: print(f" - {pkg}") docs_only = args.docs_only spellcheck_only = args.spellcheck_only disable_checks = args.disable_checks package_filters = args.package_filter for_production = args.for_production print("Current package filters: ", package_filters) current_packages = ([ p for p in available_packages if any( fnmatch.fnmatch(p, f) for f in package_filters) ] if package_filters else available_packages) with with_group( f"Documentation will be built for {len(current_packages)} package(s)" ): for pkg in current_packages: print(f" - {pkg}") all_build_errors: Dict[Optional[str], List[DocBuildError]] = {} all_spelling_errors: Dict[Optional[str], List[SpellingError]] = {} package_build_errors, package_spelling_errors = build_docs_for_packages( current_packages=current_packages, docs_only=docs_only, spellcheck_only=spellcheck_only, for_production=for_production, ) if package_build_errors: all_build_errors.update(package_build_errors) if package_spelling_errors: all_spelling_errors.update(package_spelling_errors) to_retry_packages = [ package_name for package_name, errors in package_build_errors.items() if any( any((m in e.message) for m in ERRORS_ELIGIBLE_TO_REBUILD) for e in errors) ] if to_retry_packages: for package_name in to_retry_packages: if package_name in all_build_errors: del all_build_errors[package_name] if package_name in all_spelling_errors: del all_spelling_errors[package_name] package_build_errors, package_spelling_errors = build_docs_for_packages( current_packages=to_retry_packages, docs_only=docs_only, spellcheck_only=spellcheck_only, for_production=for_production, ) if package_build_errors: all_build_errors.update(package_build_errors) if package_spelling_errors: all_spelling_errors.update(package_spelling_errors) if not disable_checks: general_errors = [] general_errors.extend( lint_checks.check_guide_links_in_operator_descriptions()) general_errors.extend(lint_checks.check_enforce_code_block()) general_errors.extend( lint_checks.check_exampleinclude_for_example_dags()) if general_errors: all_build_errors[None] = general_errors dev_index_generator.generate_index(f"{DOCS_DIR}/_build/index.html") print_build_errors_and_exit( "The documentation has errors.", all_build_errors, all_spelling_errors, )
def main(): """Main code""" args = _get_parser().parse_args() available_packages = get_available_packages() docs_only = args.docs_only spellcheck_only = args.spellcheck_only disable_checks = args.disable_checks package_filters = args.package_filter for_production = args.for_production with with_group("Available packages"): for pkg in sorted(available_packages): console.print(f" - {pkg}") if package_filters: console.print("Current package filters: ", package_filters) current_packages = process_package_filters(available_packages, package_filters) with with_group("Fetching inventories"): # Inventories that could not be retrieved should be retrieved first. This may mean this is a # new package. priority_packages = fetch_inventories() current_packages = sorted(current_packages, key=lambda d: -1 if d in priority_packages else 1) jobs = args.jobs if args.jobs != 0 else os.cpu_count() with with_group( f"Documentation will be built for {len(current_packages)} package(s) with {jobs} parallel jobs" ): for pkg_no, pkg in enumerate(current_packages, start=1): console.print(f"{pkg_no}. {pkg}") all_build_errors: Dict[Optional[str], List[DocBuildError]] = {} all_spelling_errors: Dict[Optional[str], List[SpellingError]] = {} package_build_errors, package_spelling_errors = build_docs_for_packages( current_packages=current_packages, docs_only=docs_only, spellcheck_only=spellcheck_only, for_production=for_production, jobs=jobs, verbose=args.verbose, ) if package_build_errors: all_build_errors.update(package_build_errors) if package_spelling_errors: all_spelling_errors.update(package_spelling_errors) to_retry_packages = [ package_name for package_name, errors in package_build_errors.items() if any( any((m in e.message) for m in ERRORS_ELIGIBLE_TO_REBUILD) for e in errors) ] if to_retry_packages: for package_name in to_retry_packages: if package_name in all_build_errors: del all_build_errors[package_name] if package_name in all_spelling_errors: del all_spelling_errors[package_name] package_build_errors, package_spelling_errors = build_docs_for_packages( current_packages=to_retry_packages, docs_only=docs_only, spellcheck_only=spellcheck_only, for_production=for_production, jobs=jobs, verbose=args.verbose, ) if package_build_errors: all_build_errors.update(package_build_errors) if package_spelling_errors: all_spelling_errors.update(package_spelling_errors) if not disable_checks: general_errors = lint_checks.run_all_check() if general_errors: all_build_errors[None] = general_errors dev_index_generator.generate_index(f"{DOCS_DIR}/_build/index.html") if not package_filters: _promote_new_flags() print_build_errors_and_exit( all_build_errors, all_spelling_errors, )