def __init__(self): Task.__init__(self) # There are 5 header groups: # 0. Related headers # 1. C system headers (includes standard library headers) # 2. C++ system headers (includes standard library headers) # 3. Other library headers # 4. Project headers # # See comments below for how headers are classified. # Header type 0: Related headers # Base name of include matches base name of current file # Header type 1: C standard library headers self.c_std = [ "assert.h", "complex.h", "ctype.h", "errno.h", "fenv.h", "float.h", "inttypes.h", "iso646.h", "limits.h", "locale.h", "math.h", "setjmp.h", "signal.h", "stdalign.h", "stdarg.h", "stdatomic.h", "stdbool.h", "stddef.h", "stdint.h", "stdio.h", "stdlib.h", "stdnoreturn.h", "string.h", "tgmath.h", "threads.h", "time.h", "uchar.h", "wchar.h", "wctype.h" ] # Header type 1: C system headers self.c_sys_regex = regex.compile("<[a-z][A-Za-z0-9/_-]*\.h>") # Header type 2: C++ standard library headers self.cpp_std = [ "cstdlib", "csignal", "csetjmp", "cstdarg", "typeinfo", "typeindex", "type_traits", "bitset", "functional", "utility", "ctime", "chrono", "cstddef", "initializer_list", "tuple", "new", "memory", "scoped_allocator", "climits", "cfloat", "cstdint", "cinttypes", "limits", "exception", "stdexcept", "cassert", "system_error", "cerrno", "cctype", "cwctype", "cstring", "cwchar", "cuchar", "string", "array", "vector", "deque", "list", "forward_list", "set", "map", "unordered_set", "unordered_map", "stack", "queue", "algorithm", "iterator", "cmath", "complex", "valarray", "random", "numeric", "ratio", "cfenv", "iosfwd", "ios", "istream", "ostream", "iostream", "fstream", "sstream", "strstream", "iomanip", "streambuf", "cstdio", "locale", "clocale", "codecvt", "regex", "atomic", "thread", "mutex", "shared_mutex", "future", "condition_variable", "ciso646", "ccomplex", "ctgmath", "cstdalign", "cstdbool", "any", "filesystem", "optional", "string_view", "variant" ] # Header type 3: Other library headers # They use angle brackets (open_bracket group is angle bracket) # # Header type 4: Project headers # They use double quotes (all other headers) self.header_regex = regex.compile("(?P<comment>//\s*)?" "\#include\s*" "(?P<header>" "(?P<open_bracket><|\")" "(?P<name>[^>\"]*)" "(?P<close_bracket>>|\"))" "(?P<postfix>.*)$")
def __init__(self, current_year): """Constructor for LicenseUpdate task. Keyword arguments: current_year -- year string """ Task.__init__(self) self.__current_year = current_year
def __init__(self, clang_version): """Constructor for ClangFormat task. Keyword arguments: clang_version -- version number of clang-format appended to executable name """ Task.__init__(self) if clang_version == "": self.exec_name = "clang-format" else: self.exec_name = "clang-format-" + clang_version
def __try_regex(self, lines, license_template): """Try finding license with regex of license template. Keyword arguments: lines -- lines of file license_template -- license_template string Returns: Tuple of whether license was found, year, and file contents after license. """ linesep = Task.get_linesep(lines) # Convert the license template to a regex license_rgxstr = "^" + linesep.join(license_template) license_rgxstr = license_rgxstr.replace("*", "\*").replace( ".", "\.").replace("(", "\(").replace(")", "\)").replace( "{year}", "(?P<year>[0-9]+)(-[0-9]+)?").replace( "{padding}", "[ ]*") license_rgx = regex.compile(license_rgxstr, regex.M) # Compare license year = self.__current_year match = license_rgx.search(lines) if match: try: year = match.group("year") except IndexError: pass # If comment at beginning of file is non-empty license, update it return (True, year, linesep + lines[match.end():].lstrip()) else: return (False, year, lines)
def run_pipeline(self, config_file, name, lines): linesep = Task.get_linesep(lines) format_succeeded = True # Tokenize file as brace opens, brace closes, and "using" declarations. # "using" declarations are scoped, so content inside any bracket pair is # considered outside the global namespace. token_regex = re.compile("\{|\}|using .*;") brace_count = 0 for match in token_regex.finditer(lines): token = match.group() if token == "{": brace_count += 1 elif token == "}": brace_count -= 1 elif token.startswith("using"): if brace_count == 0: linenum = lines.count(linesep, 0, match.start()) + 1 if "NOLINT" not in lines.splitlines()[linenum - 1]: format_succeeded = False print(name + ": " + str(linenum) + ": '" + token + \ "' in global namespace") return (lines, False, format_succeeded)
def make_include_guard(self, config_file, name): """Returns properly formatted include guard based on repository root and file name. Keyword arguments: config_file -- Config object name -- file name string """ repo_root_name_override = config_file.group("repoRootNameOverride") repo_root = Task.get_repo_root() guard_root = os.path.relpath(name, repo_root) if not repo_root_name_override: guard_path = os.path.basename(repo_root) + os.sep else: guard_path = repo_root_name_override[0] + os.sep include_roots = config_file.group("includeGuardRoots") if include_roots: prefix = "" for include_root in include_roots: if guard_root.startswith( include_root) and len(include_root) > len(prefix): prefix = include_root guard_path += guard_root[len(prefix):] return (regex.sub("[^a-zA-Z0-9]", "_", guard_path).upper() + "_").lstrip("_") # No include guard roots matched, so append full name guard_path += guard_root return regex.sub("[^a-zA-Z0-9]", "_", guard_path).upper() + "_"
def run_pipeline(self, config_file, name, lines): output = lines.rstrip() + Task.get_linesep(lines) if output != lines: return (output, True, True) else: return (lines, False, True)
def run_pipeline(self, config_file, name, lines): linesep = Task.get_linesep(lines) license_template = Config.read_file( os.path.dirname(os.path.abspath(name)), ".styleguide-license") # Get year when file was most recently modified in Git history # # Committer date is used instead of author date (the one shown by "git # log" because the year the file was last modified in the history should # be used. Author dates can be older than this or even out of order in # the log. cmd = ["git", "log", "-n", "1", "--format=%ci", "--", name] last_year = subprocess.run(cmd, stdout=subprocess.PIPE).stdout.decode()[:4] # If file hasn't been committed yet, use current calendar year as end of # copyright year range if last_year == "": last_year = str(date.today().year) success, first_year, appendix = self.__try_regex( lines, last_year, license_template) if not success: success, first_year, appendix = self.__try_string_search( lines, last_year, license_template) output = "" # Determine copyright range and trailing padding if first_year != last_year: year_range = first_year + "-" + last_year else: year_range = first_year for line in license_template: # Insert copyright year range line = line.replace("{year}", year_range) # Insert padding which expands to the 80th column. If there is more # than one padding token, the line may contain fewer than 80 # characters due to rounding during the padding width calculation. PADDING_TOKEN = "{padding}" padding_count = line.count(PADDING_TOKEN) if padding_count: padding = 80 - len(line) + len(PADDING_TOKEN) * padding_count padding_width = int(padding / padding_count) line = line.replace(PADDING_TOKEN, " " * padding_width) output += line + linesep # Copy rest of original file into new one output += appendix return (output, lines != output, True)
def run_pipeline(self, config_file, name, lines): self.override_regexes = [] # Compile include sorting override regexes for group in [ "includeRelated", "includeCSys", "includeCppSys", "includeOtherLibs", "includeProject" ]: regex_str = config_file.regex(group) self.override_regexes.append(regex.compile(regex_str)) self.linesep = Task.get_linesep(lines) file_name = os.path.basename(name) lines_list = lines.splitlines() # Write lines from beginning of file to headers i = 0 while i < len(lines_list) and ("#ifdef" not in lines_list[i] and "#include" not in lines_list[i]): i += 1 output_list = lines_list[0:i] suboutput, inc_present, idx, valid_headers = self.header_sort( config_file, lines_list, file_name, i, len(lines_list), 0) i = idx # If header failed to classify, return failure if not valid_headers: return (lines, False, False) if suboutput: output_list.extend(suboutput) # Remove extra empty lines from end of includes while len(output_list) > 0 and output_list[-1].rstrip() == "": del output_list[-1] # Remove last newline # Remove possible extra newline from #endif if len(output_list) > 0: output_list[-1] = output_list[-1].rstrip() output_list.append("") # Write rest of file output_list.extend(lines_list[i:]) output = self.linesep.join(output_list).rstrip() + self.linesep if output != lines: return (output, True, True) else: return (lines, False, True)
def run_pipeline(self, config_file, name, lines): linesep = Task.get_linesep(lines) file_changed = False output = "" for line in lines.splitlines(): processed_line = line[0:len(line)].rstrip() if not file_changed and len(line) != len(processed_line): file_changed = True output += processed_line + linesep return (output, file_changed, True)
def run_pipeline(self, config_file, name, lines): linesep = Task.get_linesep(lines) lines_list = lines.split(linesep) output_list = lines_list state = State.FINDING_IFNDEF ifndef_regex = re.compile("#ifndef \w+", re.ASCII) define_regex = re.compile("#define \w+", re.ASCII) if_preproc_count = 0 for i in range(len(lines_list)): if state == State.FINDING_IFNDEF: if lines_list[i].lstrip().startswith("#ifndef ") and \ lines_list[i + 1].lstrip().startswith("#define "): state = State.FINDING_ENDIF guard = self.make_include_guard(config_file, name) output_list[i] = ifndef_regex.sub("#ifndef " + guard, lines_list[i]) output_list[i + 1] = define_regex.sub( "#define " + guard, lines_list[i + 1]) if_preproc_count += 1 elif lines_list[i].lstrip().startswith("#pragma once"): state = State.DONE elif state == State.FINDING_ENDIF: if "#if" in lines_list[i]: if_preproc_count += 1 elif "#endif" in lines_list[i]: if_preproc_count -= 1 if if_preproc_count == 0: state = State.DONE output_list[i] = "#endif // " + guard else: output_list[i] = lines_list[i] else: output_list[i] = lines_list[i] # if include guard not found if state == State.FINDING_IFNDEF: print("Error: " + name + ": doesn't contain include guard or '#pragma once'") return (lines, False, False) output = linesep.join(output_list).rstrip() + linesep if output != lines: return (output, True, True) else: return (lines, False, True)
def run_pipeline(self, config_file, name, lines): linesep = Task.get_linesep(lines) # Find instances of "using namespace std;" or subnamespaces of "std", # but not std::literals or std::chrono_literals. regex = re.compile( "using\s+namespace\s+std(;|::(?!(chrono_)?literals|placeholders))") for match in regex.finditer(lines): linenum = lines.count(linesep, 0, match.start()) + 1 print( "Warning: " + name + ": " + str(linenum) + ": avoid \"using namespace std;\" in production software. While it is used in introductory C++, it pollutes the global namespace with standard library symbols." ) return (lines, False, True)
def run_pipeline(self, config_file, name, lines): linesep = Task.get_linesep(lines) file_changed = False output = "" pos = 0 # Match two or more line separators token_str = r"/\*|\*/|//|" + linesep + r"|class\s[\w\d\s]*{" + \ linesep + r"(?P<extra>(" + linesep + r")+)" token_regex = regex.compile(token_str) in_multicomment = False in_comment = False for match in token_regex.finditer(lines): token = match.group() if token == "/*": in_multicomment = True elif token == "*/": in_multicomment = False in_comment = False elif token == "//": in_comment = True elif token == linesep: in_comment = False elif not in_multicomment and not in_comment: # Otherwise, the token is a class # Removes extra line separators output += lines[pos:match.span("extra")[0]] pos = match.span()[1] file_changed = True # Write rest of file if it wasn't all processed if pos < len(lines): output += lines[pos:] if file_changed: return (output, file_changed, True) else: return (lines, file_changed, True)
def run_pipeline(self, config_file, name, lines): linesep = Task.get_linesep(lines) output = "" brace_prefix = "(?P<prefix>(extern|namespace)\s+[\w\"]*)" brace_postfix = "\s*/(/|\*)[^\r\n]*" brace_regex = re.compile( "(" + brace_prefix + "\s*)?{|" # "{" with optional prefix "\}(" + brace_postfix + ")?") # "}" with optional comment postfix name_stack = [] brace_count = 0 extract_location = 0 for match in brace_regex.finditer(lines): token = match.group() if match.group("prefix"): brace_count += 1 name_stack.append( (brace_count, match.group("prefix").rstrip())) elif "{" in token: brace_count += 1 elif token.startswith("}"): output += lines[extract_location:match.start()] if len(name_stack) > 0 and name_stack[len(name_stack) - 1][0] == brace_count: output += "} // " + name_stack.pop()[1] else: output += lines[match.start():match.end()] extract_location = match.end() brace_count -= 1 # If input has unprocessed lines, write them to output if extract_location < len(lines): output += lines[extract_location:] if output != lines: return (output, True, True) else: return (lines, False, True)
def run_pipeline(self, config_file, name, lines): linesep = Task.get_linesep(lines) license_template = Config.read_file( os.path.dirname(os.path.abspath(name)), ".styleguide-license") success, year, appendix = self.__try_regex(lines, license_template) if not success: success, year, appendix = self.__try_string_search( lines, license_template) output = "" # Determine copyright range and trailing padding if year != self.__current_year: year = year + "-" + self.__current_year for line in license_template: # Insert copyright year range line = line.replace("{year}", year) # Insert padding which expands to the 80th column. If there is more # than one padding token, the line may contain fewer than 80 # characters due to rounding during the padding width calculation. PADDING_TOKEN = "{padding}" padding_count = line.count(PADDING_TOKEN) if padding_count: padding = 80 - len(line) + len(PADDING_TOKEN) * padding_count padding_width = int(padding / padding_count) line = line.replace(PADDING_TOKEN, " " * padding_width) output += line + linesep # Copy rest of original file into new one output += appendix return (output, lines != output, True)
def make_include_guard(self, config_file, name): """Returns properly formatted include guard based on repository root and file name. Keyword arguments: config_file -- Config object name -- file name string """ repo_root = Task.get_repo_root() name = os.path.relpath(name, repo_root) guard_path = os.path.basename(repo_root) + "/" include_roots = config_file.group("includeGuardRoots") if include_roots: for include_root in include_roots: if name.startswith(include_root): guard_path += name[len(include_root):] return re.sub("[^a-zA-Z0-9]", "_", guard_path).upper() + "_" # No include guard roots matched, so append full name guard_path += name return re.sub("[^a-zA-Z0-9]", "_", guard_path).upper() + "_"
def __init__(self): Task.__init__(self) self.headers = [] # assert is a macro, so it's ommitted to avoid prefixing with std:: self.headers.append(Header("assert")) self.headers.append( Header( "ctype", { "isalum", "isalpha", "isblank", "iscntrl", "isdigit", "isgraph", "islower", "isprint", "ispunct", "isspace", "isupper", "isxdigit", "tolower", "toupper" })) self.headers.append(Header("errno")) self.headers.append(Header("float")) self.headers.append(Header("limits")) self.headers.append( Header( "math", { "cos", "acos", "cosh", "acosh", "sin", "asin", "asinh", "tan", "atan", "atan2", "atanh", "exp", "frexp", "ldexp", "log", "log10", "ilogb", "log1p", "log2", "logb", "modf", "exp2", "expm1", "scalbl", "scalbln", "pow", "sqrt", "cbrt", "hypot", "erf", "erfc", "tgamma", "lgamma", "ceil", "floor", "fmod", "trunc", "round", "lround", "llround", "rint", "lrint", "llrint", "nearbyint", "remainder", "remquo", "copysign", "nan", "nextafter", "nexttoward", "fdim", "fmax", "fmin", "fma", "fpclassify", "abs", "fabs", "signbit", "isfinite", "isinf", "isnan", "isnormal", "isgreater", "isgreaterequal", "isless", "islessequal", "islessgreater", "isunordered" })) self.headers.append( Header("setjmp", {"longjmp", "setjmp"}, ["jmp_buf"])) self.headers.append( Header("signal", {"signal", "raise"}, ["sig_atomic_t"], False)) self.headers.append(Header("stdarg", {"va_list"})) self.headers.append( Header("stddef", type_regexes=["(ptrdiff|max_align|nullptr)_t"])) # size_t isn't actually defined in stdint, but it fits best here for # removing the std:: prefix self.headers.append( Header( "stdint", type_regexes=[ "((u?int((_fast|_least)?(8|16|32|64)|max|ptr)|size)_t)" ], add_prefix=False)) self.headers.append( Header( "stdio", { "remove", "rename", "rewind", "tmpfile", "tmpnam", "fclose", "fflush", "fopen", "freopen", "fgetc", "fgets", "fputc", "fputs", "fread", "fwrite", "fgetpos", "fseek", "fsetpos", "ftell", "feof", "ferror", "setbuf", "setvbuf", "fprintf", "snprintf", "sprintf", "vfprintf", "vprintf", "vsnprintf", "vsprintf", "printf", "fscanf", "sscanf", "vfscanf", "vscanf", "vsscanf", "scanf", "getchar", "gets", "putc", "putchar", "puts", "getc", "ungetc", "clearerr", "perror" }, ["FILE", "fpos_t"])) self.headers.append( Header( "stdlib", { "atof", "atoi", "atol", "atoll", "strtof", "strtol", "strtod", "strtold", "strtoll", "strtoul", "strtoull", "rand", "srand", "free", "calloc", "malloc", "realloc", "abort", "at_quick_exit", "quick_exit", "atexit", "exit", "getenv", "system", "_Exit", "bsearch", "qsort", "llabs", "labs", "abs", "lldiv", "ldiv", "div", "mblen", "btowc", "wctomb", "wcstombs", "mbstowcs" }, ["(l|ll)?div_t"])) self.headers.append( Header( "string", { "memcpy", "memcmp", "memchr", "memmove", "memset", "strcpy", "strncpy", "strcat", "strncat", "strcmp", "strncmp", "strcoll", "strchr", "strrchr", "strstr", "strxfrm", "strcspn", "strrspn", "strpbrk", "strtok", "strerror", "strlen" })) self.headers.append( Header( "time", { "clock", "asctime", "ctime", "difftime", "gmtime", "localtime", "mktime", "strftime", "time" }, ["(clock|time)_t"]))
def run_pipeline(self, config_file, name, lines): linesep = Task.get_linesep(lines) license_template = Config.read_file( os.path.dirname(os.path.abspath(name)), ".styleguide-license") # Strip newlines at top of file stripped_lines = lines.lstrip().split(linesep) # If a comment at the beginning of the file is considered a license, it # is replaced with an updated license. Otherwise, a license header is # inserted before it. first_comment_is_license = False license_end = 0 # Regex for tokenizing on comment boundaries token_regex = re.compile("^/\*|\*/|^//") in_multiline_comment = False for line in stripped_lines: # If part of comment contains "Copyright (c)", comment is license. if "Copyright (c)" in line: first_comment_is_license = True line_has_comment = False for match in token_regex.finditer(line): # If any comment token was matched, the line has a comment line_has_comment = True token = match.group() if token == "/*": in_multiline_comment = True elif token == "*/": in_multiline_comment = False if not in_multiline_comment and not line_has_comment: break else: license_end += 1 # If comment at beginning of file is non-empty license, update it if first_comment_is_license and license_end > 0: file_parts = \ [linesep.join(stripped_lines[0:license_end]), linesep + linesep.join(stripped_lines[license_end:]).lstrip()] else: file_parts = ["", linesep + lines.lstrip()] # Default year when none is found is current one year = self.__current_year year_regex = re.compile("Copyright \(c\).*\s(20..)") modify_copyright = False for line in file_parts[0].split(linesep): match = year_regex.search(line) # If license contains copyright pattern, extract the first year if match: year = match.group(1) modify_copyright = True break output = "" # Determine copyright range and trailing padding if modify_copyright and year != self.__current_year: year = year + "-" + self.__current_year for line in license_template: # Insert copyright year range line = line.replace("{year}", year) # Insert padding which expands to the 80th column. If there is more # than one padding token, the line may contain fewer than 80 # characters due to rounding during the padding width calculation. PADDING_TOKEN = "{padding}" padding_count = line.count(PADDING_TOKEN) if padding_count: padding = 80 - len(line) + len(PADDING_TOKEN) * padding_count padding_width = int(padding / padding_count) line = line.replace(PADDING_TOKEN, " " * padding_width) output += line + linesep # Copy rest of original file into new one if len(file_parts) > 1: output += file_parts[1] return (output, lines != output, True)
def main(): # Parse command-line arguments parser = argparse.ArgumentParser( description= "Runs all formatting tasks on the code base. This should be invoked from a directory within the project." ) parser.add_argument( "-v", dest="verbose1", action="store_true", help="verbosity level 1 (prints names of processed files)") parser.add_argument( "-vv", dest="verbose2", action="store_true", help= "verbosity level 2 (prints names of processed files and tasks run on them)" ) parser.add_argument( "-j", dest="jobs", type=int, default=mp.cpu_count(), help="number of jobs to run (default is number of cores)") parser.add_argument( "-y", dest="year", type=int, default=date.today().year, help= "year to use when updating license headers (default is current year)") parser.add_argument( "-clang", dest="clang_version", type=str, default="", help= "version suffix for clang-format (invokes \"clang-format-CLANG_VERSION\" or \"clang-format\" if no suffix provided)" ) parser.add_argument( "-f", dest="file", type=str, default="", nargs="+", help= "file or directory names (can be path relative to python invocation directory or absolute path)" ) args = parser.parse_args() # All discovered files are relative to Git repo root directory, so find the # root. root_path = Task.get_repo_root() if root_path == "": print("Error: not invoked within a Git repository", file=sys.stderr) sys.exit(1) # If no files explicitly specified if not args.file: # Delete temporary files from previous incomplete run files = [ os.path.join(dp, f) for dp, dn, fn in os.walk(root_path) for f in fn if f.endswith(".tmp") ] for f in files: os.remove(f) # Recursively create list of files in given directory files = [ os.path.join(dp, f) for dp, dn, fn in os.walk(root_path) for f in fn ] if not files: print("Error: no files found to format", file=sys.stderr) sys.exit(1) else: files = [] for name in args.file: # If a directory was specified, recursively expand it if os.path.isdir(name): files.extend([ os.path.join(dp, f) for dp, dn, fn in os.walk(name) for f in fn ]) else: files.append(name) # Convert relative paths of files to absolute paths files = [os.path.abspath(name) for name in files] # Don't run tasks on Git metadata files = [name for name in files if os.sep + ".git" + os.sep not in name] # Don't check for changes in or run tasks on ignored files files = filter_ignored_files(files) # Create list of all changed files changed_file_list = [] output_list = subprocess.run( ["git", "diff", "--name-only", "master"], stdout=subprocess.PIPE).stdout.split() for line in output_list: changed_file_list.append(root_path + os.sep + line.strip().decode("ascii")) # Don't run tasks on modifiable or generated files work = [] for name in files: config_file = Config(os.path.dirname(name), ".styleguide") if config_file.is_modifiable_file(name): continue if config_file.is_generated_file(name): # Emit warning if a generated file was editted if name in changed_file_list: print("Warning: generated file '" + name + "' modified") continue work.append(name) files = work # If there are no files left, do nothing if len(files) == 0: sys.exit(0) # Prepare file batches for batch tasks chunksize = math.ceil(len(files) / args.jobs) file_batches = [ files[i:i + chunksize] for i in range(0, len(files), chunksize) ] # IncludeOrder is run after Stdlib so any C std headers changed to C++ or # vice versa are sorted properly. ClangFormat is run after the other tasks # so it can clean up their formatting. task_pipeline = [ BraceComment(), CIdentList(), IncludeGuard(), LicenseUpdate(str(args.year)), JavaClass(), Newline(), Stdlib(), IncludeOrder(), UsingDeclaration(), UsingNamespaceStd(), Whitespace() ] run_pipeline(task_pipeline, args, files) task_pipeline = [ClangFormat(args.clang_version)] run_batch(task_pipeline, args, file_batches) # These tasks fix clang-format formatting task_pipeline = [Jni()] run_pipeline(task_pipeline, args, files) # Lint is run last since previous tasks can affect its output. task_pipeline = [PyFormat(), Lint()] run_batch(task_pipeline, args, file_batches)
def run_pipeline(self, config_file, name, lines): linesep = Task.get_linesep(lines) output = "" brace_prefix = "(?P<prefix>(extern|namespace)\s+[\w\"]*)" brace_postfix = "[ \t]*/(/|\*)[^\r\n]*" brace_regex = regex.compile( r"/\*|\*/|//|\\\\|\\\"|\"|\\'|'|" + linesep + "|" + \ "(" + brace_prefix + "\s*)?{|" # "{" with optional prefix "}(" + brace_postfix + ")?") # "}" with optional comment postfix name_stack = [] brace_count = 0 extract_location = 0 in_multicomment = False in_singlecomment = False in_string = False in_char = False for match in brace_regex.finditer(lines): token = match.group() if token == "/*": if not in_singlecomment and not in_string and not in_char: in_multicomment = True elif token == "*/": if not in_singlecomment and not in_string and not in_char: in_multicomment = False elif token == "//": if not in_multicomment and not in_string and not in_char: in_singlecomment = True elif in_singlecomment and linesep in token: # Ignore token if it's in a singleline comment. Only check it # for newlines to end the comment. in_singlecomment = False elif in_multicomment or in_singlecomment: # Tokens processed after this branch are ignored if they are in # comments continue elif token == "\\\"": continue elif token == "\"": if not in_char: in_string = not in_string elif token == "\\'": continue elif token == "'": if not in_string: in_char = not in_char elif in_string or in_char: # Tokens processed after this branch are ignored if they are in # double or single quotes continue elif match.group("prefix"): brace_count += 1 name_stack.append( (brace_count, match.group("prefix").rstrip())) elif "{" in token: brace_count += 1 elif token.startswith("}"): output += lines[extract_location:match.start()] if len(name_stack) > 0 and name_stack[len(name_stack) - 1][0] == brace_count: output += "} // " + name_stack.pop()[1] else: output += lines[match.start():match.end()] extract_location = match.end() brace_count -= 1 # If input has unprocessed lines, write them to output if extract_location < len(lines): output += lines[extract_location:] if output != lines: return (output, True, True) else: return (lines, False, True)
def run_pipeline(self, config_file, name, lines): linesep = Task.get_linesep(lines) file_changed = False output = "" pos = 0 # C files use C linkage by default is_c = config_file.is_c_file(name) # Tokenize as extern "C" or extern "C++" with optional {, open brace, # close brace, or () folllowed by { to disambiguate function calls. # extern is first to try matching a brace to it before classifying the # brace as generic. # # Valid function prototypes and definitions have return type, spaces, # function name, optional spaces, then braces. They are followed by ; or # {. # # "def\\s+\w+" matches preprocessor directives "#ifdef" and "#ifndef" so # their contents aren't used as a return type. preproc_str = "#else|#endif|" comment_str = "/\*|\*/|//|" + linesep + "|" string_str = r"\\\\|\\\"|\"|" char_str = r"\\'|'|" extern_str = "(?P<ext_decl>extern \"C(\+\+)?\")\s+(?P<ext_brace>\{)?|" braces_str = "\{|\}|;|def\s+\w+|\w+\s+\w+\s*(?P<paren>\(\))" postfix_str = "(?=\s*(;|\{))" token_regex = regex.compile(preproc_str + comment_str + string_str + char_str + extern_str + braces_str + postfix_str) EXTRA_POP_OFFSET = 2 # If value is greater than pop offset, the value needs to be restored in # addition to an extra stack pop being performed. The pop offset is # removed before assigning to is_c. # # is_c + pop offset == 2: C lang restore that needs extra brace pop # is_c + pop offset == 3: C++ lang restore that needs extra brace pop extern_brace_indices = [is_c] in_preproc_else = False in_multicomment = False in_singlecomment = False in_string = False in_char = False for match in token_regex.finditer(lines): token = match.group() # Skip #else to #endif in case they have braces in them. This # assumes preprocessor directives are only used for conditional # compilation for different platforms and have the same amount of # braces in both branches. Nested preprocessor directives are also # not handled. if token == "#else": in_preproc_else = True elif token == "#endif": in_preproc_else = False if in_preproc_else: continue if token == "/*": if not in_singlecomment and not in_string and not in_char: in_multicomment = True elif token == "*/": if not in_singlecomment and not in_string and not in_char: in_multicomment = False elif token == "//": if not in_multicomment and not in_string and not in_char: in_singlecomment = True elif in_singlecomment and linesep in token: # Ignore token if it's in a singleline comment. Only check it # for newlines to end the comment. in_singlecomment = False elif in_multicomment or in_singlecomment: # Tokens processed after this branch are ignored if they are in # comments continue elif token == "\\\"": continue elif token == "\"": if not in_char: in_string = not in_string elif token == "\\'": continue elif token == "'": if not in_string: in_char = not in_char elif in_string or in_char: # Tokens processed after this branch are ignored if they are in # double or single quotes continue elif token == "{": extern_brace_indices.append(is_c) elif token == "}": is_c = extern_brace_indices.pop() if len(extern_brace_indices) == 0: self.__print_failure(name) return (lines, False, False) # If the next stack frame is from an extern without braces, pop # it. if extern_brace_indices[-1] >= EXTRA_POP_OFFSET: is_c = extern_brace_indices[-1] - EXTRA_POP_OFFSET extern_brace_indices.pop() elif token == ";": if len(extern_brace_indices) == 0: self.__print_failure(name) return (lines, False, False) # If the next stack frame is from an extern without braces, pop # it. if extern_brace_indices[-1] >= EXTRA_POP_OFFSET: is_c = extern_brace_indices[-1] - EXTRA_POP_OFFSET extern_brace_indices.pop() elif token.startswith("extern"): # Back up language setting first if match.group("ext_brace"): extern_brace_indices.append(is_c) else: # Handling an extern without braces changing the language # type is done by treating it as a pseudo-brace that gets # popped as well when the next "}" or ";" is encountered. # The "extra pop" offset is used as a flag on the top stack # value that is checked whenever a pop is performed. extern_brace_indices.append(is_c + EXTRA_POP_OFFSET) # Change language based on extern declaration if match.group("ext_decl") == "extern \"C\"": is_c = True else: is_c = False elif match.group( "paren") and "return " not in match.group() and is_c: # Replaces () with (void) output += lines[pos:match.span("paren")[0]] + "(void)" pos = match.span("paren")[0] + len("()") file_changed = True # Write rest of file if it wasn't all processed if pos < len(lines): output += lines[pos:] # Invariant: extern_brace_indices has one entry success = len(extern_brace_indices) == 1 if not success: self.__print_failure(name) if file_changed: return (output, file_changed, success) else: return (lines, file_changed, success)
def run_pipeline(self, config_file, name, lines): linesep = Task.get_linesep(lines) format_succeeded = True # Tokenize file as brace opens, brace closes, and "using" declarations. # "using" declarations are scoped, so content inside any bracket pair is # considered outside the global namespace. token_regex = regex.compile(r"/\*|\*/|//|\\\\|\\\"|\"|\\'|'|" + linesep + r"|\{|\}|using\s[^;]*;") brace_count = 0 in_multicomment = False in_singlecomment = False in_string = False in_char = False for match in token_regex.finditer(lines): token = match.group() if token == "/*": if not in_singlecomment and not in_string and not in_char: in_multicomment = True elif token == "*/": if not in_singlecomment and not in_string and not in_char: in_multicomment = False elif token == "//": if not in_multicomment and not in_string and not in_char: in_singlecomment = True elif in_singlecomment and linesep in token: # Ignore token if it's in a singleline comment. Only check it # for newlines to end the comment. in_singlecomment = False elif in_multicomment or in_singlecomment: # Tokens processed after this branch are ignored if they are in # comments continue elif token == "\\\"": continue elif token == "\"": if not in_char: in_string = not in_string elif token == "\\'": continue elif token == "'": if not in_string: in_char = not in_char elif in_string or in_char: # Tokens processed after this branch are ignored if they are in # double or single quotes continue elif token == "{": brace_count += 1 elif token == "}": brace_count -= 1 elif token.startswith("using"): if brace_count == 0: linenum = lines.count(linesep, 0, match.start()) + 1 if "NOLINT" not in lines.splitlines()[linenum - 1]: format_succeeded = False print(name + ": " + str(linenum) + ": '" + token + \ "' in global namespace") return (lines, False, format_succeeded)
def test_includeguard(): test = TaskTest(IncludeGuard()) repo_root = os.path.basename(Task.get_repo_root()).upper() # Fix incorrect include guard test.add_input("./Test.h", "#ifndef WRONG_H" + os.linesep + \ "#define WRONG_C" + os.linesep + \ os.linesep + \ "#endif" + os.linesep) test.add_output( "#ifndef " + repo_root + "_TEST_H_" + os.linesep + \ "#define " + repo_root + "_TEST_H_" + os.linesep + \ os.linesep + \ "#endif // " + repo_root + "_TEST_H_" + os.linesep, True, True) # Ensure nested preprocessor statements are handled properly for incorrect # include guard test.add_input("./Test.h", "#ifndef WRONG_H" + os.linesep + \ "#define WRONG_C" + os.linesep + \ os.linesep + \ "#if SOMETHING" + os.linesep + \ "// do something" + os.linesep + \ "#endif" + os.linesep + \ "#endif" + os.linesep) test.add_output( "#ifndef " + repo_root + "_TEST_H_" + os.linesep + \ "#define " + repo_root + "_TEST_H_" + os.linesep + \ os.linesep + \ "#if SOMETHING" + os.linesep + \ "// do something" + os.linesep + \ "#endif" + os.linesep + \ "#endif // " + repo_root + "_TEST_H_" + os.linesep, True, True) # Don't touch correct include guard test.add_input("./Test.h", "#ifndef " + repo_root + "_TEST_H_" + os.linesep + \ "#define " + repo_root + "_TEST_H_" + os.linesep + \ os.linesep + \ "#endif // " + repo_root + "_TEST_H_" + os.linesep) test.add_latest_input_as_output(True) # Fail on missing include guard test.add_input("./Test.h", "// Empty file" + os.linesep) test.add_latest_input_as_output(False) # Verify pragma once counts as include guard test.add_input("./Test.h", "#pragma once" + os.linesep) test.add_latest_input_as_output(True) # Ensure include guard roots are processed correctly test.add_input("./Test.h", "#ifndef " + repo_root + "_WPIFORMAT_TEST_H_" + os.linesep + \ "#define " + repo_root + "_WPIFORMAT_TEST_H_" + os.linesep + \ os.linesep + \ "#endif // " + repo_root + "_WPIFORMAT_TEST_H_" + os.linesep) test.add_output( "#ifndef " + repo_root + "_TEST_H_" + os.linesep + \ "#define " + repo_root + "_TEST_H_" + os.linesep + \ os.linesep + \ "#endif // " + repo_root + "_TEST_H_" + os.linesep, True, True) # Ensure leading underscores are removed (this occurs if the user doesn't # include a trailing "/" in the include guard root) test.add_input("./Test/Test.h", "#ifndef " + repo_root + "_WPIFORMAT_TEST_TEST_H_" + os.linesep + \ "#define " + repo_root + "_WPIFORMAT_TEST_TEST_H_" + os.linesep + \ os.linesep + \ "#endif // " + repo_root + "_WPIFORMAT_TEST_TEST_H_" + os.linesep) test.add_output( "#ifndef " + repo_root + "_TEST_H_" + os.linesep + \ "#define " + repo_root + "_TEST_H_" + os.linesep + \ os.linesep + \ "#endif // " + repo_root + "_TEST_H_" + os.linesep, True, True) test.run(OutputType.FILE)
def __try_string_search(self, lines, license_template): """Try finding license with string search. Keyword arguments: lines -- lines of file license_template -- license_template string Returns: Tuple of whether license was found, year, and file contents after license. """ linesep = Task.get_linesep(lines) # Strip newlines at top of file stripped_lines = lines.lstrip().split(linesep) # If a comment at the beginning of the file is considered a license, it # is replaced with an updated license. Otherwise, a license header is # inserted before it. first_comment_is_license = False license_end = 0 # Regex for tokenizing on comment boundaries token_regex = regex.compile("/\*|\*/|^//") in_multiline_comment = False for line in stripped_lines: # If part of comment contains "Copyright (c)", comment is # license. if "Copyright (c)" in line: first_comment_is_license = True line_has_comment = False for match in token_regex.finditer(line): # If any comment token was matched, the line has a comment line_has_comment = True token = match.group() if token == "/*": in_multiline_comment = True elif token == "*/": in_multiline_comment = False if not in_multiline_comment and not line_has_comment: break else: license_end += 1 # If comment at beginning of file is non-empty license, update it year = self.__current_year if first_comment_is_license and license_end > 0: license_part = linesep.join(stripped_lines[0:license_end]) appendix_part = \ linesep + linesep.join(stripped_lines[license_end:]).lstrip() year_regex = regex.compile("Copyright \(c\)(?>.*?\s(20..))") for line in license_part.split(linesep): match = year_regex.search(line) # If license contains copyright pattern, extract the first year if match: year = match.group(1) break return (True, year, appendix_part) else: return (False, year, linesep + lines.lstrip())
def run_pipeline(self, config_file, name, lines): linesep = Task.get_linesep(lines) regex_str_sig = r"(/\*(?>(.|\n)*?\*/)\s+)?" + \ r"JNIEXPORT\s+(?P<ret>\w+)\s+JNICALL\s+" + \ r"(?P<func>Java_\w+)\s*\(\s*" + \ r"(?P<env_type>JNIEnv\s*\*\s*)" + \ r"(?P<env_name>\w+)?,\s*jclass\s*(?P<jclass_name>\w*)?" regex_sig = regex.compile(regex_str_sig) regex_str_func = r"Java_(?P<class>\w+)_(?P<method>[^_]+)$" regex_func = regex.compile(regex_str_func) # Matches a comma followed by the type, an optional variable name, and # an optional closing parenthesis regex_str_arg = (r", \s* (?P<arg>(?P<arg_type>[\w\*]+)(\s+ \w+)?)|\)\s*" r"(?P<trailing>{|;)") regex_arg = regex.compile(regex_str_arg, regex.VERBOSE) output = "" pos = 0 for match_sig in regex_sig.finditer(lines): comment = "" signature = "" if match_sig.start() > 0: output += lines[pos:match_sig.start()] # Add JNI-specific args jni_args = " (" if match_sig.group("env_type"): jni_args += match_sig.group("env_type") if match_sig.group("env_name"): jni_args += match_sig.group("env_name") jni_args += ", jclass" if match_sig.group("jclass_name"): jni_args += " " + match_sig.group("jclass_name") # Write JNI function comment. Splitting at "__" removes overload # annotation from method comment match = regex_func.search(match_sig.group("func").split("__")[0]) comment += "/*" + linesep + \ " * Class: " + match.group("class") + linesep + \ " * Method: " + match.group("method") + linesep + \ " * Signature: (" signature += "JNIEXPORT " + match_sig.group("ret") + " JNICALL" + \ linesep + match_sig.group("func") + linesep + jni_args # Add other args line_length = len(jni_args) for match_arg in regex_arg.finditer(lines[match_sig.end():]): if ")" in match_arg.group(): break # If args going past 80 characters elif line_length + len(", ") + len( match_arg.group("arg")) + len(")") > 80: # Put current arg on next line and set line_length to # reflect that signature += "," + linesep + " " + match_arg.group("arg") line_length = len(" " + match_arg.group("arg")) else: signature += ", " + match_arg.group("arg") line_length += len(", ") + len(match_arg.group("arg")) comment += self.map_jni_type(match_arg.group("arg_type")) comment += ")" + self.map_jni_type(match_sig.group("ret")) + linesep + \ " */" + linesep # Output correct trailing character for declaration vs definition if match_arg.group("trailing") == "{": signature += ")" + linesep + "{" else: signature += ");" output += comment + signature pos = match_sig.end() + match_arg.end() # Write rest of file if pos < len(lines): output += lines[pos:] if output == "" or output == lines: return (lines, False, True) else: return (output, True, True)