def cpplint(projroot: Path, full: bool) -> None: """Run lint-checking on all code deemed lint-able.""" from concurrent.futures import ThreadPoolExecutor from multiprocessing import cpu_count from efrotools import get_config from efro.terminal import Clr os.chdir(projroot) filenames = get_code_filenames(projroot) if any(' ' in name for name in filenames): raise Exception('found space in path; unexpected') # Check the config for a list of ones to ignore. code_blacklist: List[str] = get_config(projroot).get( 'cpplint_blacklist', []) # Just pretend blacklisted ones don't exist. filenames = [f for f in filenames if f not in code_blacklist] filenames = [f for f in filenames if not f.endswith('.mm')] cachepath = Path(projroot, 'config/.cache-lintcode') if full and cachepath.exists(): cachepath.unlink() cache = FileCache(cachepath) # Clear out entries and hashes for files that have changed/etc. cache.update(filenames, '') dirtyfiles = cache.get_dirty_files() if dirtyfiles: print(f'{Clr.BLU}CppLint checking' f' {len(dirtyfiles)} file(s)...{Clr.RST}') def lint_file(filename: str) -> None: result = subprocess.call(['cpplint', '--root=src', filename]) if result != 0: raise Exception(f'Linting failed for {filename}') with ThreadPoolExecutor(max_workers=cpu_count()) as executor: # Converting this to a list will propagate any errors. list(executor.map(lint_file, dirtyfiles)) if dirtyfiles: cache.mark_clean(filenames) cache.write() print( f'{Clr.GRN}CppLint: all {len(filenames)} files are passing.{Clr.RST}', flush=True)
def pylint(projroot: Path, full: bool, fast: bool) -> None: """Run lint-checking on all scripts deemed lint-able.""" from efrotools import get_files_hash pylintrc = Path(projroot, '.pylintrc') if not os.path.isfile(pylintrc): raise Exception('pylintrc not found where expected') filenames = get_script_filenames(projroot) if any(' ' in name for name in filenames): raise Exception('found space in path; unexpected') script_blacklist: List[str] = [] filenames = [f for f in filenames if f not in script_blacklist] cachebasename = '.cache-lintscriptsfast' if fast else '.cache-lintscripts' cachepath = Path(projroot, 'config', cachebasename) if full and cachepath.exists(): cachepath.unlink() cache = FileCache(cachepath) # Clear out entries and hashes for files that have changed/etc. cache.update(filenames, get_files_hash([pylintrc])) # Do a recursive dependency check and mark all files who are # either dirty or have a dependency that is dirty. filestates: Dict[str, bool] = {} for fname in filenames: _dirty_dep_check(fname, filestates, cache, fast, 0) dirtyfiles = [k for k, v in filestates.items() if v] # Let's sort by modification time, so ones we're actively trying # to fix get linted first and we see remaining errors faster. dirtyfiles.sort(reverse=True, key=lambda f: os.stat(f).st_mtime) if dirtyfiles: print(f'Pylint checking {len(dirtyfiles)} file(s)...', flush=True) try: _run_script_lint(projroot, pylintrc, cache, dirtyfiles, filenames) except Exception: # Note: even if we fail here, we still want to # update our disk cache (since some lints may have passed). print('Pylint failed.', flush=True) # Hmm; this can be handy sometimes; perhaps should add an env # var to control it? if bool(False): import traceback traceback.print_exc() cache.write() sys.exit(255) print(f'Pylint: all {len(filenames)} files are passing.', flush=True) cache.write()
def pylint(projroot: Path, full: bool, fast: bool) -> None: """Run Pylint on all scripts in our project (with smart dep tracking).""" from efrotools import get_files_hash from efro.terminal import Clr pylintrc = Path(projroot, '.pylintrc') if not os.path.isfile(pylintrc): raise Exception('pylintrc not found where expected') filenames = get_script_filenames(projroot) if any(' ' in name for name in filenames): raise Exception('found space in path; unexpected') script_blacklist: List[str] = [] filenames = [f for f in filenames if f not in script_blacklist] cachebasename = '.cache-lintscriptsfast' if fast else '.cache-lintscripts' cachepath = Path(projroot, 'config', cachebasename) if full and cachepath.exists(): cachepath.unlink() cache = FileCache(cachepath) # Clear out entries and hashes for files that have changed/etc. cache.update(filenames, get_files_hash([pylintrc])) # Do a recursive dependency check and mark all files who are # either dirty or have a dependency that is dirty. filestates: Dict[str, bool] = {} for fname in filenames: _dirty_dep_check(fname, filestates, cache, fast, 0) dirtyfiles = [k for k, v in filestates.items() if v] # Let's sort by modification time, so ones we're actively trying # to fix get linted first and we see remaining errors faster. dirtyfiles.sort(reverse=True, key=lambda f: os.stat(f).st_mtime) if dirtyfiles: print( f'{Clr.BLU}Pylint checking {len(dirtyfiles)} file(s)...{Clr.RST}', flush=True) try: _run_pylint(projroot, pylintrc, cache, dirtyfiles, filenames) finally: # No matter what happens, we still want to # update our disk cache (since some lints may have passed). cache.write() print(f'{Clr.GRN}Pylint: all {len(filenames)} files are passing.{Clr.RST}', flush=True) cache.write()
def cpplint(projroot: Path, full: bool) -> None: """Run lint-checking on all code deemed lint-able.""" # pylint: disable=too-many-locals import tempfile from concurrent.futures import ThreadPoolExecutor from multiprocessing import cpu_count from efrotools import getconfig, PYVER from efro.terminal import Clr from efro.error import CleanError os.chdir(projroot) filenames = get_code_filenames(projroot) for fpath in filenames: if ' ' in fpath: raise Exception(f'Found space in path {fpath}; unexpected.') # Check the config for a list of ones to ignore. code_blacklist: List[str] = getconfig(projroot).get( 'cpplint_blacklist', []) # Just pretend blacklisted ones don't exist. filenames = [f for f in filenames if f not in code_blacklist] filenames = [f for f in filenames if not f.endswith('.mm')] cachepath = Path(projroot, 'config/.cache-lintcode') if full and cachepath.exists(): cachepath.unlink() cache = FileCache(cachepath) # Clear out entries and hashes for files that have changed/etc. cache.update(filenames, '') dirtyfiles = cache.get_dirty_files() if dirtyfiles: print(f'{Clr.BLU}CppLint checking' f' {len(dirtyfiles)} file(s)...{Clr.RST}') # We want to do a few custom modifications to the cpplint module... try: import cpplint as cpplintmodule except Exception: raise CleanError('Unable to import cpplint') with open(cpplintmodule.__file__) as infile: codelines = infile.read().splitlines() cheadersline = codelines.index('_C_HEADERS = frozenset([') # Extra headers we consider as valid C system headers. c_headers = [ 'malloc.h', 'tchar.h', 'jni.h', 'android/log.h', 'EGL/egl.h', 'libgen.h', 'linux/netlink.h', 'linux/rtnetlink.h', 'android/bitmap.h', 'android/log.h', 'uuid/uuid.h', 'cxxabi.h', 'direct.h', 'shellapi.h', 'rpc.h', 'io.h' ] codelines.insert(cheadersline + 1, ''.join(f"'{h}'," for h in c_headers)) # Skip unapproved C++ headers check (it flags <mutex>, <thread>, etc.) headercheckline = codelines.index( " if include and include.group(1) in ('cfenv',") codelines[headercheckline] = ( " if False and include and include.group(1) in ('cfenv',") # Don't complain about unknown NOLINT categories. # (we use them for clang-tidy) unknownlintline = codelines.index( ' elif category not in _LEGACY_ERROR_CATEGORIES:') codelines[unknownlintline] = ' elif False:' def lint_file(filename: str) -> None: result = subprocess.call( [f'python{PYVER}', '-m', 'cpplint', '--root=src', filename], env=env) if result != 0: raise CleanError( f'{Clr.RED}Cpplint failed for {filename}.{Clr.RST}') with tempfile.TemporaryDirectory() as tmpdir: # Write our replacement module, make it discoverable, then run. with open(tmpdir + '/cpplint.py', 'w') as outfile: outfile.write('\n'.join(codelines)) env = os.environ.copy() env['PYTHONPATH'] = tmpdir with ThreadPoolExecutor(max_workers=cpu_count()) as executor: # Converting this to a list will propagate any errors. list(executor.map(lint_file, dirtyfiles)) if dirtyfiles: cache.mark_clean(filenames) cache.write() print( f'{Clr.GRN}CppLint: all {len(filenames)} files are passing.{Clr.RST}', flush=True)
def _apply_pylint_run_to_cache(projroot: Path, run: Any, dirtyfiles: List[str], allfiles: List[str], cache: FileCache) -> int: # pylint: disable=too-many-locals # pylint: disable=too-many-branches # pylint: disable=too-many-statements from astroid import modutils from efrotools import getconfig from efro.error import CleanError # First off, build a map of dirtyfiles to module names # (and the corresponding reverse map). paths_to_names: Dict[str, str] = {} names_to_paths: Dict[str, str] = {} for fname in allfiles: try: mpath = modutils.modpath_from_file(fname) mpath = _filter_module_name('.'.join(mpath)) paths_to_names[fname] = mpath except ImportError: # This probably means its a tool or something not in our # standard path. In this case just use its base name. # (seems to be what pylint does) dummyname = os.path.splitext(os.path.basename(fname))[0] paths_to_names[fname] = dummyname for key, val in paths_to_names.items(): names_to_paths[val] = key # If there's any cyclic-import errors, just mark all deps as dirty; # don't want to add the logic to figure out which ones the cycles cover # since they all seems to appear as errors for the last file in the list. cycles: int = run.linter.stats.get('by_msg', {}).get('cyclic-import', 0) have_dep_cycles: bool = cycles > 0 if have_dep_cycles: print(f'Found {cycles} cycle-errors; keeping all dirty files dirty.') # Update dependencies for what we just ran. # A run leaves us with a map of modules to a list of the modules that # imports them. We want the opposite though: for each of our modules # we want a list of the modules it imports. reversedeps = {} # Make sure these are all proper module names; no foo.bar.__init__ stuff. for key, val in run.linter.stats['dependencies'].items(): sval = [_filter_module_name(m) for m in val] reversedeps[_filter_module_name(key)] = sval deps: Dict[str, Set[str]] = {} untracked_deps = set() for mname, mallimportedby in reversedeps.items(): for mimportedby in mallimportedby: if mname in names_to_paths: deps.setdefault(mimportedby, set()).add(mname) else: untracked_deps.add(mname) ignored_untracked_deps: List[str] = getconfig(projroot).get( 'pylint_ignored_untracked_deps', []) # Add a few that this package itself triggers. ignored_untracked_deps += ['pylint.lint', 'astroid.modutils', 'astroid'] # Ignore some specific untracked deps; complain about any others. untracked_deps = set(dep for dep in untracked_deps if dep not in ignored_untracked_deps) if untracked_deps: raise CleanError( f'Pylint found untracked dependencies: {untracked_deps}.' ' If these are external to your project, add them to' ' "pylint_ignored_untracked_deps" in the project config.') # Finally add the dependency lists to our entries (operate on # everything in the run; it may not be mentioned in deps). no_deps_modules = set() for fname in dirtyfiles: fmod = paths_to_names[fname] if fmod not in deps: # Since this code is a bit flaky, lets always announce when we # come up empty and keep a whitelist of expected values to ignore. no_deps_modules.add(fmod) depsval: List[str] = [] else: # Our deps here are module names; store paths. depsval = [names_to_paths[dep] for dep in deps[fmod]] cache.entries[fname]['deps'] = depsval # Let's print a list of modules with no detected deps so we can make sure # this is behaving. if no_deps_modules: if bool(False): print('NOTE: no dependencies found for:', ', '.join(no_deps_modules)) # Ok, now go through all dirtyfiles involved in this run. # Mark them as either errored or clean depending on whether there's # error info for them in the run stats. # Once again need to convert any foo.bar.__init__ to foo.bar. stats_by_module: Dict[str, Any] = { _filter_module_name(key): val for key, val in run.linter.stats['by_module'].items() } errcount = 0 for fname in dirtyfiles: mname2 = paths_to_names.get(fname) if mname2 is None: raise Exception('unable to get module name for "' + fname + '"') counts = stats_by_module.get(mname2) # 'statement' count seems to be new and always non-zero; ignore it if counts is not None: counts = {c: v for c, v in counts.items() if c != 'statement'} if (counts is not None and any(counts.values())) or have_dep_cycles: # print('GOT FAIL FOR', fname, counts) if 'hash' in cache.entries[fname]: del cache.entries[fname]['hash'] errcount += 1 else: # print('MARKING FILE CLEAN', mname2, fname) cache.entries[fname]['hash'] = (cache.curhashes[fname]) return errcount
def formatcode(projroot: Path, full: bool) -> None: """Run clang-format on all of our source code (multithreaded).""" import time import concurrent.futures from multiprocessing import cpu_count from efrotools import get_files_hash os.chdir(projroot) cachepath = Path(projroot, 'config/.cache-formatcode') if full and cachepath.exists(): cachepath.unlink() cache = FileCache(cachepath) cfconfig = Path(projroot, '.clang-format') filenames = get_code_filenames(projroot) confighash = get_files_hash([cfconfig]) cache.update(filenames, confighash) dirtyfiles = cache.get_dirty_files() def format_file(filename: str) -> Dict[str, Any]: start_time = time.time() # Note: seems os.system does not unlock the gil; # make sure to use subprocess. result = subprocess.call(['clang-format', '-i', filename]) if result != 0: raise Exception(f'Formatting failed for {filename}') duration = time.time() - start_time print(f'Formatted {filename} in {duration:.2f} seconds.') sys.stdout.flush() return {'f': filename, 't': duration} with concurrent.futures.ThreadPoolExecutor( max_workers=cpu_count()) as executor: # Converting this to a list will propagate any errors. list(executor.map(format_file, dirtyfiles)) if dirtyfiles: # Since we changed files, need to update hashes again. cache.update(filenames, confighash) cache.mark_clean(filenames) cache.write() print(f'Formatting is up to date for {len(filenames)} code files.', flush=True)
def formatscripts(projroot: Path, full: bool) -> None: """Runs yapf on all our scripts (multithreaded).""" import time from concurrent.futures import ThreadPoolExecutor from multiprocessing import cpu_count from efrotools import get_files_hash, PYVER os.chdir(projroot) cachepath = Path(projroot, 'config/.cache-formatscripts') if full and cachepath.exists(): cachepath.unlink() cache = FileCache(cachepath) yapfconfig = Path(projroot, '.style.yapf') filenames = get_script_filenames(projroot) confighash = get_files_hash([yapfconfig]) cache.update(filenames, confighash) dirtyfiles = cache.get_dirty_files() def format_file(filename: str) -> None: start_time = time.time() result = subprocess.call( [f'python{PYVER}', '-m', 'yapf', '--in-place', filename]) if result != 0: raise Exception(f'Formatting failed for {filename}') duration = time.time() - start_time print(f'Formatted {filename} in {duration:.2f} seconds.') sys.stdout.flush() with ThreadPoolExecutor(max_workers=cpu_count()) as executor: # Convert the futures to a list to propagate any errors even # though there are no return values we use. list(executor.map(format_file, dirtyfiles)) if dirtyfiles: # Since we changed files, need to update hashes again. cache.update(filenames, confighash) cache.mark_clean(filenames) cache.write() print(f'Formatting is up to date for {len(filenames)} script files.', flush=True)
def check_cpplint(projroot: Path, full: bool) -> None: """Run cpplint on all our applicable code.""" # pylint: disable=too-many-locals from concurrent.futures import ThreadPoolExecutor from multiprocessing import cpu_count from efrotools import getconfig, PYVER from efro.terminal import Clr from efro.error import CleanError os.chdir(projroot) filenames = get_code_filenames(projroot) for fpath in filenames: if ' ' in fpath: raise Exception(f'Found space in path {fpath}; unexpected.') # Check the config for a list of ones to ignore. code_blacklist: list[str] = getconfig(projroot).get( 'cpplint_blacklist', []) # Just pretend blacklisted ones don't exist. filenames = [f for f in filenames if f not in code_blacklist] filenames = [f for f in filenames if not f.endswith('.mm')] cachepath = Path(projroot, '.cache/check_cpplint') if full and cachepath.exists(): cachepath.unlink() cache = FileCache(cachepath) # Clear out entries and hashes for files that have changed/etc. cache.update(filenames, '') dirtyfiles = cache.get_dirty_files() if dirtyfiles: print(f'{Clr.BLU}CppLint checking' f' {len(dirtyfiles)} file(s)...{Clr.RST}') disabled_filters: list[str] = [ 'build/include_what_you_use', 'build/c++11', 'readability/nolint', 'legal/copyright', ] filterstr = ','.join(f'-{x}' for x in disabled_filters) def lint_file(filename: str) -> None: result = subprocess.call([ f'python{PYVER}', '-m', 'cpplint', '--root=src', f'--filter={filterstr}', filename ]) if result != 0: raise CleanError( f'{Clr.RED}Cpplint failed for {filename}.{Clr.RST}') with ThreadPoolExecutor(max_workers=cpu_count()) as executor: # Converting this to a list will propagate any errors. list(executor.map(lint_file, dirtyfiles)) if dirtyfiles: cache.mark_clean(filenames) cache.write() print( f'{Clr.GRN}CppLint: all {len(filenames)} files are passing.{Clr.RST}', flush=True)
def _apply_pylint_run_to_cache(projroot: Path, run: Any, dirtyfiles: list[str], allfiles: list[str], cache: FileCache) -> int: # pylint: disable=too-many-locals # pylint: disable=too-many-branches # pylint: disable=too-many-statements from astroid import modutils from efrotools import getconfig from efro.error import CleanError # First off, build a map of dirtyfiles to module names # (and the corresponding reverse map). paths_to_names: dict[str, str] = {} names_to_paths: dict[str, str] = {} for fname in allfiles: try: mpath = modutils.modpath_from_file(fname) mpath = _filter_module_name('.'.join(mpath)) paths_to_names[fname] = mpath except ImportError: # This probably means its a tool or something not in our # standard path. In this case just use its base name. # (seems to be what pylint does) dummyname = os.path.splitext(os.path.basename(fname))[0] paths_to_names[fname] = dummyname for key, val in paths_to_names.items(): names_to_paths[val] = key # If there's any cyclic-import errors, just mark all deps as dirty; # don't want to add the logic to figure out which ones the cycles cover # since they all seems to appear as errors for the last file in the list. cycles: int = run.linter.stats.by_msg.get('cyclic-import', 0) have_dep_cycles: bool = cycles > 0 if have_dep_cycles: print(f'Found {cycles} cycle-errors; keeping all dirty files dirty.') # Update dependencies for what we just ran. # A run leaves us with a map of modules to a list of the modules that # imports them. We want the opposite though: for each of our modules # we want a list of the modules it imports. reversedeps = {} # Make sure these are all proper module names; no foo.bar.__init__ stuff. for key, val in run.linter.stats.dependencies.items(): sval = [_filter_module_name(m) for m in val] reversedeps[_filter_module_name(key)] = sval deps: dict[str, set[str]] = {} untracked_deps = set() for mname, mallimportedby in reversedeps.items(): for mimportedby in mallimportedby: if mname in names_to_paths: deps.setdefault(mimportedby, set()).add(mname) else: untracked_deps.add(mname) ignored_untracked_deps: set[str] = set( getconfig(projroot).get('pylint_ignored_untracked_deps', [])) # Add a few that this package itself triggers. ignored_untracked_deps |= {'pylint.lint', 'astroid.modutils', 'astroid'} # EW; as of Python 3.9, suddenly I'm seeing system modules showing up # here where I wasn't before. I wonder what changed. Anyway, explicitly # suppressing them here but should come up with a more robust system # as I feel this will get annoying fast. ignored_untracked_deps |= { 're', 'importlib', 'os', 'xml.dom', 'weakref', 'random', 'collections.abc', 'textwrap', 'webbrowser', 'signal', 'pathlib', 'zlib', 'json', 'pydoc', 'base64', 'functools', 'asyncio', 'xml', '__future__', 'traceback', 'typing', 'urllib.parse', 'ctypes.wintypes', 'code', 'urllib.error', 'threading', 'xml.etree.ElementTree', 'pickle', 'dataclasses', 'enum', 'py_compile', 'urllib.request', 'math', 'multiprocessing', 'socket', 'getpass', 'hashlib', 'ctypes', 'inspect', 'rlcompleter', 'http.client', 'readline', 'platform', 'datetime', 'copy', 'concurrent.futures', 'ast', 'subprocess', 'numbers', 'logging', 'xml.dom.minidom', 'uuid', 'types', 'tempfile', 'shutil', 'shlex', 'stat', 'wave', 'html', 'binascii' } # Ignore some specific untracked deps; complain about any others. untracked_deps = set(dep for dep in untracked_deps if dep not in ignored_untracked_deps and not dep.startswith('bametainternal')) if untracked_deps: raise CleanError( f'Pylint found untracked dependencies: {untracked_deps}.' ' If these are external to your project, add them to' ' "pylint_ignored_untracked_deps" in the project config.') # Finally add the dependency lists to our entries (operate on # everything in the run; it may not be mentioned in deps). no_deps_modules = set() for fname in dirtyfiles: fmod = paths_to_names[fname] if fmod not in deps: # Since this code is a bit flaky, lets always announce when we # come up empty and keep a whitelist of expected values to ignore. no_deps_modules.add(fmod) depsval: list[str] = [] else: # Our deps here are module names; store paths. depsval = [names_to_paths[dep] for dep in deps[fmod]] cache.entries[fname]['deps'] = depsval # Let's print a list of modules with no detected deps so we can make sure # this is behaving. if no_deps_modules: if bool(False): print('NOTE: no dependencies found for:', ', '.join(no_deps_modules)) # Ok, now go through all dirtyfiles involved in this run. # Mark them as either errored or clean depending on whether there's # error info for them in the run stats. # Once again need to convert any foo.bar.__init__ to foo.bar. stats_by_module: dict[str, Any] = { _filter_module_name(key): val for key, val in run.linter.stats.by_module.items() } errcount = 0 for fname in dirtyfiles: mname2 = paths_to_names.get(fname) if mname2 is None: raise Exception('unable to get module name for "' + fname + '"') counts = stats_by_module.get(mname2) # 'statement' count seems to be new and always non-zero; ignore it if counts is not None: counts = {c: v for c, v in counts.items() if c != 'statement'} if (counts is not None and any(counts.values())) or have_dep_cycles: # print('GOT FAIL FOR', fname, counts) if 'hash' in cache.entries[fname]: del cache.entries[fname]['hash'] errcount += 1 else: # print('MARKING FILE CLEAN', mname2, fname) cache.entries[fname]['hash'] = (cache.curhashes[fname]) return errcount
def formatscripts(projroot: Path, full: bool) -> None: """Runs yapf on all our scripts (multithreaded).""" import time from concurrent.futures import ThreadPoolExecutor from efrotools import get_files_hash from multiprocessing import cpu_count os.chdir(projroot) cachepath = Path(projroot, 'config/.cache-formatscripts') if full and cachepath.exists(): cachepath.unlink() cache = FileCache(cachepath) yapfconfig = Path(projroot, '.style.yapf') filenames = get_script_filenames(projroot) confighash = get_files_hash([yapfconfig]) cache.update(filenames, confighash) dirtyfiles = cache.get_dirty_files() def format_file(filename: str) -> None: start_time = time.time() result = subprocess.call(['yapf', '--in-place', filename]) if result != 0: raise Exception(f'Formatting failed for {filename}') duration = time.time() - start_time print(f'Formatted {filename} in {duration:.2f} seconds.') sys.stdout.flush() # NOTE: using fewer workers than we have logical procs for now; # we're bottlenecked by one or two long running instances # so it actually helps to lighten the load around them. # may want to revisit later when we have everything chopped up # better with ThreadPoolExecutor(max_workers=cpu_count() // 2) as executor: # Convert the futures to a list to propagate any errors even # though there are no return values we use. list(executor.map(format_file, dirtyfiles)) if dirtyfiles: # Since we changed files, need to update hashes again. cache.update(filenames, confighash) cache.mark_clean(filenames) cache.write() print(f'Formatting is up to date for {len(filenames)} script files.', flush=True)