def lint_js_files_are_translated(files_to_lint): """Verify that nltext in the js files are marked for translation. See docstring of: _lint_js_content Returns: List of triples: (filename, lineno, error message) """ # Make sure jsx files are compiled first, then we will lint the resulting # js. kake.make.build_many([ ('genfiles/compiled_jsx/en/%s.js' % ka_root.relpath(f), {}) for f in files_to_lint if f.endswith('.jsx') ]) files_to_lint = lintutil.filter(files_to_lint, suffix=('.js', '.jsx')) for f in files_to_lint: abs_f = f f = ka_root.relpath(f) # Exclude files that we don't need to translate: we don't care # if those files are 'properly' marked up or not. if intl.english_only.should_not_translate_file(f): continue if f.endswith(".jsx"): abs_f = "%s/genfiles/compiled_jsx/en/%s.js" % (ka_root.root, f) f = ka_root.relpath(abs_f) file_contents = lintutil.file_contents(abs_f) for error in _lint_js_content(abs_f, file_contents): yield error
def lint_javascript_in_html(files_to_lint): """Run eslint on javascript content inside html files. We want to make sure that the js in our html files has no unused variables/etc, so we can do better dependency analysis on it. """ files = lintutil.filter(files_to_lint, prefix='templates/', suffix='.html') lint_inputs = [] # list of filename/contents pairs for f in files: contents_of_f = lintutil.file_contents(f) # extract_js_from_html actually can return several versions # of the js contents, each with a different branch of an # if/else commented out. (So for input like # <script>var x = { {%if c%}y: 4{%else%}y: 5{%endif%} };</script> # we'd see both 'var x = { y: 4 };' and 'var x = { y: 5 }') # We lint all such strings and combine the output. js_contents_iter = js_in_html.extract_js_from_html(contents_of_f, "jinja2", file_name=f) try: for js_contents in js_contents_iter: lint_inputs.append((f, js_contents)) except Exception, why: yield (f, 1, 'HTML parse error: %s' % why) continue
def lint_js_files_are_translated(files_to_lint): """Verify that nltext in the js files are marked for translation. See docstring of: _lint_js_content Returns: List of triples: (filename, lineno, error message) """ # Make sure jsx files are compiled first, then we will lint the resulting # js. kake.make.build_many([('genfiles/compiled_jsx/en/%s.js' % ka_root.relpath(f), {}) for f in files_to_lint if f.endswith('.jsx')]) files_to_lint = lintutil.filter(files_to_lint, suffix=('.js', '.jsx')) for f in files_to_lint: abs_f = f f = ka_root.relpath(f) # Exclude files that we don't need to translate: we don't care # if those files are 'properly' marked up or not. if intl.english_only.should_not_translate_file(f): continue if f.endswith(".jsx"): abs_f = "%s/genfiles/compiled_jsx/en/%s.js" % (ka_root.root, f) f = ka_root.relpath(abs_f) file_contents = lintutil.file_contents(abs_f) for error in _lint_js_content(abs_f, file_contents): yield error
def lint_duplicate_test_functions(files_to_lint): """Enforce that no two test functions in the same class have the same name. This prevents one test from not running because another test overwrites it. """ files = lintutil.filter(files_to_lint, suffix='_test.py') for filename in files: contents = lintutil.file_contents(filename) functions = _matches_with_line_numbers(_TEST_FN_RE, contents) classes = _matches_with_line_numbers(_CLASS_NAME_RE, contents) lines = collections.defaultdict(list) for fname, line in functions: cls = _find_class_at_line(classes, line) if cls is None: continue qualified_fname = cls + '.' + fname lines[qualified_fname] += [line] for fname, dup_lines in lines.iteritems(): if len(dup_lines) > 1: if any( lintutil.has_nolint(filename, line) for line in dup_lines): continue yield (filename, max(dup_lines), 'Duplicate test function name %(name)s' ' at lines %(lines)s' % { 'name': fname, 'lines': ', '.join(str(l) for l in dup_lines) })
def lint_duplicate_test_functions(files_to_lint): """Enforce that no two test functions in the same class have the same name. This prevents one test from not running because another test overwrites it. """ files = lintutil.filter(files_to_lint, suffix='_test.py') for filename in files: contents = lintutil.file_contents(filename) functions = _matches_with_line_numbers(_TEST_FN_RE, contents) classes = _matches_with_line_numbers(_CLASS_NAME_RE, contents) lines = collections.defaultdict(list) for fname, line in functions: cls = _find_class_at_line(classes, line) if cls is None: continue qualified_fname = cls + '.' + fname lines[qualified_fname] += [line] for fname, dup_lines in lines.iteritems(): if len(dup_lines) > 1: if any(lintutil.has_nolint(filename, line) for line in dup_lines): continue yield (filename, max(dup_lines), 'Duplicate test function name %(name)s' ' at lines %(lines)s' % { 'name': fname, 'lines': ', '.join(str(l) for l in dup_lines) })
def lint_handlebars_debugger_statement(files_to_lint): """Make sure we don't have any stray {{debugger}} statements.""" files = lintutil.filter(files_to_lint, suffix='.handlebars') for (fname, linenum, _) in lintutil.find_lines( files, _HANDLEBARS_DEBUGGER_RE): yield (fname, linenum, 'All {{debugger}} statements should be removed.')
def lint_jsx_fixture_files(files_to_lint): """Ensure that fixture files have an associated fixtures_test.js file.""" # We only want fixture files fixture_files_to_lint = lintutil.filter(files_to_lint, suffix=('.jsx.fixture.js', '.jsx.fixture.jsx')) for fixture_file in fixture_files_to_lint: path, fixture_name = os.path.split(fixture_file) # We need to potentially walk up the tree to find the "-package" # directory which will old the fixtures_test.js file. while path and not path.endswith("-package"): path = os.path.dirname(path) fixture_test_file = os.path.join(path, "fixtures_test.js") # Check to see if the fixtures_test.js file is completely missing if not os.path.isfile(fixture_test_file): yield (fixture_file, 1, "Fixture files must have an associated fixtures_test.js " "file. Run tools/regen_fixture_tests.sh to create it.") else: # Otherwise, it exists, so make sure that the fixture file is # referenced inside the fixtures_test.js file. search_re = re.compile(r'/%s"' % re.escape(fixture_name)) if not lintutil.line_number(fixture_test_file, search_re, 0): yield (fixture_test_file, 1, "Fixture test is missing %s. Run " "tools/regen_fixture_tests.sh to update it." % fixture_name)
def lint_every_rendered_component_has_a_fixture(files_to_lint): """Check that every component we render has an associated fixture file. In order to test that a particular react component can be server-side rendered, we need to actually try to render it with a particular value for props. This is what component.fixture.js files are for. We just make sure the people write them! For now, we allow the fixture file to be empty (just `[]`). Later we may insist on actually useful fixtures. """ files_to_lint = lintutil.filter(files_to_lint, suffix='.html') for f in files_to_lint: contents_of_f = lintutil.file_contents(f) for m in RENDER_REACT_RE.finditer(contents_of_f): component_file = m.group(1) # To be server-side renderable, the fixture file has to be # a javascript file, not jsx or something else. fixture_file = component_file + '.fixture.js' if not os.path.exists(ka_root.join(fixture_file)): linenum = contents_of_f.count('\n', 0, m.start()) + 1 yield (f, linenum, '%s must have an associated fixture file %s' % (component_file, fixture_file))
def lint_package_imports(files_to_lint): """Find cases where we import a package (directory) rather than a module. For instance, "import emails". That is not a module, it's a directory. Sometimes importing a directory is correct, for third-party modules with non-trivial __init__.py's. But it's never right for our code. """ files_to_lint = lintutil.filter(files_to_lint, suffix='.py') for f in files_to_lint: import_lines = _get_import_lines(f) for import_line in import_lines: if import_line.module.startswith(('__future__', 'third_party')): # The rules are different for third-party code, and # __future__'s aren't even real modules. continue if _is_a_first_party_package(import_line.module): contents = lintutil.file_contents(f) corrections = re.findall(r'%s\.[\w_]+' % import_line.name, contents) yield ( f, import_line.lineno, "Do not import the whole directory '%s'; " "import the modules inside it, possibly: %s" % (import_line.module, ', '.join(sorted(set(corrections)))))
def lint_every_rendered_component_has_a_fixture(files_to_lint): """Check that every component we render has an associated fixture file. In order to test that a particular react component can be server-side rendered, we need to actually try to render it with a particular value for props. This is what component.fixture.js files are for. We just make sure the people write them! For now, we allow the fixture file to be empty (just `[]`). Later we may insist on actually useful fixtures. """ files_to_lint = lintutil.filter(files_to_lint, suffix='.html') for f in files_to_lint: contents_of_f = lintutil.file_contents(f) for m in RENDER_REACT_RE.finditer(contents_of_f): component_file = m.group(1) # To be server-side renderable, the fixture file has to be # a javascript file, not jsx or something else. fixture_file = component_file + '.fixture.js' if not os.path.exists(ka_root.join(fixture_file)): linenum = contents_of_f.count('\n', 0, m.start()) + 1 yield (f, linenum, '%s must have an associated fixture file %s' % (component_file, fixture_file))
def lint_banned_file_types(files_to_lint): """Find all the bad files using banned extensions and complain.""" bad_files = lintutil.filter(files_to_lint, suffix=_BAD_EXTENSIONS, exclude_substrings=_EXISTING_FILE_WHITELIST) for bad_file in bad_files: yield (bad_file, 1, "Use React and Aphrodite instead.")
def lint_jsx_fixture_files(files_to_lint): """Ensure that fixture files have an associated fixtures_test.js file.""" # We only want fixture files fixture_files_to_lint = lintutil.filter(files_to_lint, suffix=('.jsx.fixture.js', '.jsx.fixture.jsx')) for fixture_file in fixture_files_to_lint: path, fixture_name = os.path.split(fixture_file) # We need to potentially walk up the tree to find the "-package" # directory which will old the fixtures_test.js file. while path and not path.endswith("-package"): path = os.path.dirname(path) fixture_test_file = os.path.join(path, "fixtures_test.js") # Check to see if the fixtures_test.js file is completely missing if not os.path.isfile(fixture_test_file): yield (fixture_file, 1, "Fixture files must have an associated fixtures_test.js " "file. Run tools/regen_fixture_tests.sh to create it.") else: # Otherwise, it exists, so make sure that the fixture file is # referenced inside the fixtures_test.js file. search_re = re.compile(r'/%s"' % re.escape(fixture_name)) if not lintutil.line_number(fixture_test_file, search_re, 0): yield (fixture_test_file, 1, "Fixture test is missing %s. Run " "tools/regen_fixture_tests.sh to update it." % fixture_name)
def lint_no_templatetags_calls_in_python(files_to_lint): """Enforce that python code does not call code in templatetags/filters. templatetags.py and templatefilters.py are intended to hold code that emits to jinja2 templates. For that reason, we allow things there that are hard to use correctly in python logic, such as jinja2.Markup() objects. To make sure we keep the distinction between jinja2 objects and python objects separate, we check that python code never calls stuff in templatetags/templatefilters. Such dual-use code can, if needed, be moved to a separate file, where it can be called by both 'normal' python code and functions in templatetags. For instance, templatefilters.py:static_url() just calls url_util.static_url(). """ # Only need to lint python files, but not test files. Also: # templatetags and templatefilters routines can call each other, # config_jinja can reference them to add them to the jinja2 config. exclude = ('_test.py', '/templatetags.py', '/templatefilters.py', '/config_jinja.py') files = lintutil.filter(files_to_lint, suffix='.py', exclude_substrings=exclude) for (fname, linenum, line) in lintutil.find_lines(files, _IMPORT_TEMPLATETAGS_RE): m = _IMPORT_TEMPLATETAGS_RE.search(line) module = m.group(1) or m.group(2) or m.group(3) yield (fname, linenum, '%s is for jinja2; do not call from Python code' % module)
def lint_non_literal_i18n_do_not_translate(files_to_lint): """Find where we mark html as not needing translation but its non-literal We require anything we mark with i18n_do_not_translate to be only a string literal. i18n_do_not_translate marks the string as "safe" (that is jinja2 won't autoescape it) so we need to be extra careful we don't create a potential XSS attack vector if we end up marking some variable as safe. Unless that line is marked with {{# @Nolint #}}, we flag it as a lint error. Returns: List of triples: (filename, lineno, error message) """ # Limit files-to-lint to files under templates/ that are html or txt. files = lintutil.filter(files_to_lint, prefix='templates/', suffix=('.html', '.txt')) for filename in files: contents = lintutil.file_contents(filename) for fn_match in _J2_FUNCTION_RE.finditer(contents): # Make sure there's no @Nolint anywhere around this function. newline = contents.find('\n', fn_match.end()) newline = newline if newline > -1 else len(contents) if '@Nolint' in contents[fn_match.start():newline]: continue for m in _BAD_DO_NOT_TRANSLATE_RE.finditer(fn_match.group(0)): linenum = 1 + contents.count('\n', 0, fn_match.start() + m.start(1)) yield (filename, linenum, '%s contains something that is not just a ' 'literal string. Only literal strings can be ' 'inside i18n_do_not_translate.' % m.group(1))
def lint_absolute_import(files_to_lint): """Make sure we use absolute imports everywhere. Suppose you have code like a top-level util.py and also email/util.py, and you do "import util". If you're doing that from email/foo.py, then it will import email/util.py instead of the top-level util.py. Using absolute imports everywhere avoids the risk of that problem. It also makes it more obvious to those looking at the code, exactly what's being imported. """ files_to_lint = lintutil.filter(files_to_lint, suffix='.py') for f in files_to_lint: import_lines = _get_import_lines(f) if not import_lines: # No imports? Then no need for an absolute-import directive. continue if any(i.module == '__future__.absolute_import' for i in import_lines): # Has an absolute-import, we're happy. continue contents = lintutil.file_contents(f) if contents.startswith('#!'): # If the file can be run as a script, then absolute-import # doesn't always work since we don't automatically have # webapp-root in the pythonpath for scripts. So we don't # require absolute-import in that case. continue # __future__ imports come first, so the best line number to # report for the error is the first import line. yield (f, import_lines[0].lineno, 'Modules must use: from __future__ import absolute_import')
def lint_non_literal_i18n_in_javascript(files_to_lint): """Complain about uses of i18n._() on something other than a string. i18n._(variable) is dangerous -- you don't know if the variable has been translated or not. """ files_to_lint = lintutil.filter(files_to_lint, suffix=('.js', '.jsx'), exclude_substrings=('/i18n.js', '/i18n_test.js')) # This regexp pattern captures a string, possibly concatenated with +'s. js_str = (r'(?:' + r'"(?:\\.|[^"])*"|' + r"'(?:\\.|[^'])*'|" + r'`(?:\\.|[^`])*`' + ')') js_concat_str = '\s*%s(?:\s*\+\s*%s)*\s*' % (js_str, js_str) gettext_occurrences = re.compile(r'\b(i18n._|i18n.ngettext)\(') valid_gettext_occurrences = re.compile( r'\bi18n._\(%(str)s[,)]|\bi18n.ngettext\(%(str)s,\s*%(str)s[,)]' % {'str': js_concat_str}) for f in files_to_lint: contents = lintutil.file_contents(f) all_occurrences = { m.start(): m for m in gettext_occurrences.finditer(contents) } valid_occurrences = { m.start(): m for m in valid_gettext_occurrences.finditer(contents) } for (startpos, m) in all_occurrences.iteritems(): i18n_fn = m.group(1) msg = None # set to non-None if there's a problem. if startpos not in valid_occurrences: msg = ('%s must have string literals as arguments, ' 'with no variables or templates' % i18n_fn) else: # Then we're ok with this! *Unless* it's a template string # with $(...) inside it, then we're not ok. m2 = valid_occurrences[startpos] if m2.group().count(r'${') > m2.group().count(r'\${'): msg = ('You must use %' + '(...)s with template strings ' 'inside %s, not ${...}' % i18n_fn) if msg: start_lineno = 1 + contents.count('\n', 0, startpos) # Doing a real regexp to find the end of this function call # is tough, we just do something simple and pray. end_paren = contents.find(')', startpos) if end_paren == -1: end_paren = len(contents) end_lineno = 1 + contents.count('\n', 0, end_paren) if any( lintutil.has_nolint(f, lineno) for lineno in xrange(start_lineno, end_lineno + 1)): continue yield (f, start_lineno, msg)
def lint_non_literal_i18n_in_python(files_to_lint): """Complain about uses of i18n._() on something other than a string. i18n._(variable) is dangerous -- you don't know if the variable has been translated or not. Sometimes it's ok, but usually it's a mistake, and a better solution is to pass in translated_variable instead. (The OK cases can be marked with @Nolint.) """ current_state = None # not currently in a gettext context gettext_linenum = None # linenum of current gettext call has_nolint = False # any line of the i18n._ may have @nolint # Only need to lint python files, but not test files. files = lintutil.filter(files_to_lint, suffix='.py', exclude_substrings=['_test.py']) for (fname, ttype, token, (linenum, _), _, line) in (lintutil.python_tokens(files)): # Don't lint *definitions* of these methods. if line.strip().startswith('def '): continue # The state machine may transition on either the type # of the token, or its literal value. if token in _GETTEXT_STATE_MACHINE[current_state]: current_state = token gettext_linenum = gettext_linenum or linenum has_nolint = has_nolint or '@Nolint' in line elif ttype in _GETTEXT_STATE_MACHINE[current_state]: current_state = ttype gettext_linenum = gettext_linenum or linenum has_nolint = has_nolint or '@Nolint' in line elif current_state is None: # We weren't in gettext before, and we're still not. pass else: # We're in gettext, but can't transition: we're bad. # Give ourselves *one more* chance for nolint. has_nolint = has_nolint or '@Nolint' in line if current_state in _START_TOKENS: # BUT: If we last saw _ or ngettext and can't # transition because we're not a '(', that # means it's not a function call, so we can # ignore it. e.g. '(f, _) = file_and_line()'. pass elif not has_nolint: yield (fname, gettext_linenum, 'gettext-like calls should only have ' 'literal strings as their arguments') current_state = None gettext_linenum = None has_nolint = False # If we've happily ended a gettext call, clear the state. if _GETTEXT_STATE_MACHINE[current_state] is None: current_state = None gettext_linenum = None has_nolint = False
def lint_non_literal_i18n_in_python(files_to_lint): """Complain about uses of i18n._() on something other than a string. i18n._(variable) is dangerous -- you don't know if the variable has been translated or not. Sometimes it's ok, but usually it's a mistake, and a better solution is to pass in translated_variable instead. (The OK cases can be marked with @Nolint.) """ current_state = None # not currently in a gettext context gettext_linenum = None # linenum of current gettext call has_nolint = False # any line of the i18n._ may have @nolint # Only need to lint python files, but not test files. files = lintutil.filter(files_to_lint, suffix='.py', exclude_substrings=['_test.py']) for (fname, ttype, token, (linenum, _), _, line) in ( lintutil.python_tokens(files)): # Don't lint *definitions* of these methods. if line.strip().startswith('def '): continue # The state machine may transition on either the type # of the token, or its literal value. if token in _GETTEXT_STATE_MACHINE[current_state]: current_state = token gettext_linenum = gettext_linenum or linenum has_nolint = has_nolint or '@Nolint' in line elif ttype in _GETTEXT_STATE_MACHINE[current_state]: current_state = ttype gettext_linenum = gettext_linenum or linenum has_nolint = has_nolint or '@Nolint' in line elif current_state is None: # We weren't in gettext before, and we're still not. pass else: # We're in gettext, but can't transition: we're bad. # Give ourselves *one more* chance for nolint. has_nolint = has_nolint or '@Nolint' in line if current_state in _START_TOKENS: # BUT: If we last saw _ or ngettext and can't # transition because we're not a '(', that # means it's not a function call, so we can # ignore it. e.g. '(f, _) = file_and_line()'. pass elif not has_nolint: yield (fname, gettext_linenum, 'gettext-like calls should only have ' 'literal strings as their arguments') current_state = None gettext_linenum = None has_nolint = False # If we've happily ended a gettext call, clear the state. if _GETTEXT_STATE_MACHINE[current_state] is None: current_state = None gettext_linenum = None has_nolint = False
def lint_non_literal_i18n_in_javascript(files_to_lint): """Complain about uses of i18n._() on something other than a string. i18n._(variable) is dangerous -- you don't know if the variable has been translated or not. """ files_to_lint = lintutil.filter( files_to_lint, suffix=('.js', '.jsx'), exclude_substrings=('/i18n.js', '/i18n_test.js')) # This regexp pattern captures a string, possibly concatenated with +'s. js_str = (r'(?:' + r'"(?:\\.|[^"])*"|' + r"'(?:\\.|[^'])*'|" + r'`(?:\\.|[^`])*`' + ')') js_concat_str = '\s*%s(?:\s*\+\s*%s)*\s*' % (js_str, js_str) gettext_occurrences = re.compile(r'\b(i18n._|i18n.ngettext)\(') valid_gettext_occurrences = re.compile( r'\bi18n._\(%(str)s[,)]|\bi18n.ngettext\(%(str)s,\s*%(str)s[,)]' % {'str': js_concat_str}) for f in files_to_lint: contents = lintutil.file_contents(f) all_occurrences = { m.start(): m for m in gettext_occurrences.finditer(contents)} valid_occurrences = { m.start(): m for m in valid_gettext_occurrences.finditer(contents)} for (startpos, m) in all_occurrences.iteritems(): i18n_fn = m.group(1) msg = None # set to non-None if there's a problem. if startpos not in valid_occurrences: msg = ('%s must have string literals as arguments, ' 'with no variables or templates' % i18n_fn) else: # Then we're ok with this! *Unless* it's a template string # with $(...) inside it, then we're not ok. m2 = valid_occurrences[startpos] if m2.group().count(r'${') > m2.group().count(r'\${'): msg = ('You must use %' + '(...)s with template strings ' 'inside %s, not ${...}' % i18n_fn) if msg: start_lineno = 1 + contents.count('\n', 0, startpos) # Doing a real regexp to find the end of this function call # is tough, we just do something simple and pray. end_paren = contents.find(')', startpos) if end_paren == -1: end_paren = len(contents) end_lineno = 1 + contents.count('\n', 0, end_paren) if any(lintutil.has_nolint(f, lineno) for lineno in xrange(start_lineno, end_lineno + 1)): continue yield (f, start_lineno, msg)
def lint_all_fixtures_match_components(files_to_lint): files = lintutil.filter(files_to_lint, prefix='javascript/', suffix='.fixture.js') for f in files: component_path = f[:-len(".fixture.js")] if not os.path.isfile(component_path): yield(f, 1, "Expected to find a React component at '{}'. If you just " "moved that component, be sure to move " "this fixture too.".format(component_path))
def lint_all_fixtures_match_components(files_to_lint): files = lintutil.filter(files_to_lint, prefix='javascript/', suffix='.fixture.js') for f in files: component_path = f[:-len(".fixture.js")] if not os.path.isfile(component_path): yield (f, 1, "Expected to find a React component at '{}'. If you just " "moved that component, be sure to move " "this fixture too.".format(component_path))
def lint_yamls_are_parseable(files_to_lint): """Enforce that any yamls to be uploaded to app engine are parseable.""" files_to_lint = lintutil.filter(files_to_lint, suffix='.yaml') for filename in files_to_lint: with open(filename) as f: try: yaml.safe_load(f) except yaml.parser.ParserError as e: yield (filename, e.problem_mark.line + 1, # yaml.Mark is 0-indexed 'Error parsing yaml: %s %s.' % (e.problem, e.context))
def lint_yamls_are_parseable(files_to_lint): """Enforce that any yamls to be uploaded to app engine are parseable.""" files_to_lint = lintutil.filter(files_to_lint, suffix='.yaml') for filename in files_to_lint: with open(filename) as f: try: yaml.safe_load(f) except yaml.parser.ParserError as e: yield ( filename, e.problem_mark.line + 1, # yaml.Mark is 0-indexed 'Error parsing yaml: %s %s.' % (e.problem, e.context))
def lint_fixture_files(files_to_lint): # We only care about "plain" jsx files # We also want to ignore test files jsx_files_to_lint = lintutil.filter(files_to_lint, suffix=('.jsx'), exclude_substrings=_JSX_NO_FIXTURE_WHITELIST + ['testfiles', 'testutils', '.fixture.jsx']) for jsx_file in jsx_files_to_lint: if (not os.path.isfile(jsx_file + ".fixture.js") and not os.path.isfile(jsx_file + ".fixture.jsx")): yield (jsx_file, 1, ".jsx files must have an associated " ".jsx.fixture.js file.")
def lint_comma(files_to_lint): """Find imports that use a comma to import multiple things on one line. KA style is we only have one import per line, so we just complain. """ files_to_lint = lintutil.filter(files_to_lint, suffix='.py') for f in files_to_lint: import_lines = _get_import_lines(f) for import_line in import_lines: if import_line.has_comma: yield (f, import_line.lineno, "Use one import per line, rather than a comma.")
def lint_fixture_files(files_to_lint): # We only care about "plain" jsx files # We also want to ignore test files jsx_files_to_lint = lintutil.filter(files_to_lint, suffix=('.jsx'), exclude_substrings=_JSX_NO_FIXTURE_WHITELIST + ['testfiles', 'testutils', '.fixture.jsx']) for jsx_file in jsx_files_to_lint: if (not os.path.isfile(jsx_file + ".fixture.js") and not os.path.isfile(jsx_file + ".fixture.jsx")): yield (jsx_file, 1, ".jsx files must have an associated " ".jsx.fixture.js file.")
def lint_js_test_files(files_to_lint): # We only want .js files that are inside the javascript directory # We don't care if tests or fixtures have tests js_files_to_lint = lintutil.filter(files_to_lint, prefix=('javascript/'), suffix=('.js'), exclude_substrings=['.jsx.fixture.js', '_test.js'] + _JS_NO_TEST_WHITELIST) for js_file in js_files_to_lint: test_file = js_file.replace(".js", "_test.js") if not os.path.isfile(test_file): yield (js_file, 1, ".js files must have an associated _test.js file.")
def lint_js_test_files(files_to_lint): # We only want .js files that are inside the javascript directory # We don't care if tests or fixtures have tests js_files_to_lint = lintutil.filter(files_to_lint, prefix=('javascript/'), suffix=('.js'), exclude_substrings=['.jsx.fixture.js', '_test.js'] + _JS_NO_TEST_WHITELIST) for js_file in js_files_to_lint: test_file = js_file.replace(".js", "_test.js") if not os.path.isfile(test_file): yield (js_file, 1, ".js files must have an associated _test.js file.")
def lint_gae_specific_code(files_to_lint): """Enforce that we do not add new code that will not work on flex. This lint check is only for the 'main' webapp service; other services (in services/) may be running on appengine classic, and can use this functionality. In fact, sometimes we separated them out into services for just that reason! """ files_to_lint = lintutil.filter(files_to_lint, suffix='.py', exclude_substrings=['/services/']) for (bad_module_re, bad_module, replacement) in _BAD_MODULES_RES: for (f, linenum, _) in lintutil.find_lines(files_to_lint, bad_module_re): yield (f, linenum, 'Do not import %s -- use %s instead.' % (bad_module, replacement))
def lint_gae_specific_code(files_to_lint): """Enforce that we do not add new code that will not work on flex. This lint check is only for the 'main' webapp service; other services (in services/) may be running on appengine classic, and can use this functionality. In fact, sometimes we separated them out into services for just that reason! """ files_to_lint = lintutil.filter( files_to_lint, suffix='.py', exclude_substrings=['/services/']) for (bad_module_re, bad_module, replacement) in _BAD_MODULES_RES: for (f, linenum, _) in lintutil.find_lines(files_to_lint, bad_module_re): yield (f, linenum, 'Do not import %s -- use %s instead.' % (bad_module, replacement))
def lint_no_backslashes(files_to_lint): """Complain if any import lines use backslashes for line continuation. While our style guide discourages end-of-line backslashes for line continuation, it's not normally something we'd lint on. But I do in this case because the regexp for other import-linters will get confused in the presense of backslashes, and it's easy enough to use parens instead. """ backslash_re = re.compile(r'^\s*(from|import)\b.*\\$', re.MULTILINE) files_to_lint = lintutil.filter(files_to_lint, suffix='.py') for (filename, linenum, _) in lintutil.find_lines(files_to_lint, backslash_re): yield (filename, linenum, "Use parens instead of backslash for line-continuation")
def lint_no_user_property(files_to_lint): """Enforce that nobody uses UserProperty. ...unless marked as an explicitly approved legacy usage via @Nolint. """ files_to_lint = lintutil.filter(files_to_lint, suffix='.py') for filename in files_to_lint: contents = lintutil.file_contents(filename) for fn_match in _USERPROPERTY_RE.finditer(contents): # Make sure there's no @Nolint anywhere around this function. newline = contents.find('\n', fn_match.end()) newline = newline if newline > -1 else len(contents) if '@Nolint' in contents[fn_match.start():newline]: continue linenum = 1 + contents.count('\n', 0, fn_match.start()) yield (filename, linenum, # filename and linenum "Do not use UserProperty, it is not safe. Use UserData's " "key as its foreign key, instead.")
def lint_symbol_imports(files_to_lint): """Find cases where we import a symbol rather than a module. For instance, "from main import application". That is not a module, it's a variable defined within a module. """ files_to_lint = lintutil.filter(files_to_lint, suffix='.py') for f in files_to_lint: import_lines = _get_import_lines(f) for import_line in import_lines: # You can only import a symbol via the 'from ... import' syntax if not import_line.has_from: continue if import_line.module.startswith('__future__'): # not a real dir continue # TODO(csilvers): our style guide has an exception # allowing the import of individual symbols from # third-party packages which document this as a best # practice. Add a special-case to ignore those, here. # (Or amend the style guide to remove the exception :-) ) if not _is_a_module_or_package(import_line.module): # While we may not be a module or a package, the thing # containing us should definitely be. If it's not, or # if it's a package but an empty one, then we're # importing something that doesn't (now) exist, # probably an auto-generated package. We just ignore # it; we can't say anything useful about it. module_parent = import_line.module.rsplit('.', 1)[0] module_parent_as_package = ka_root.join( module_parent.replace('.', os.sep), '__init__.py') if (not _is_a_module_or_package(module_parent) or (os.path.exists(module_parent_as_package) and os.stat(module_parent_as_package).st_size == 0)): continue actual_module = import_line.module.rsplit('.', 1)[0] yield (f, import_line.lineno, "Import the whole module %s, not individual symbols " "inside it" % actual_module)
def lint_iife_in_html_javascript(files_to_lint): """Make sure js in <script> tags doesn't leak globals by using an IIFE. IIFE is when you surround js code in '(function() { ... })();'. This prevents any variable declarations from polluting the global scope. We require all <script> tags to have an IIFE if they declare a variable. """ files = lintutil.filter(files_to_lint, prefix='templates/', suffix='.html') # For determining if we need an IIFE, we only care about 'var' # statements. var_re = re.compile(r'\bvar\b') for f in files: contents_of_f = lintutil.file_contents(f) # extract_js_from_html actually can return several versions # of the js contents, each with a different branch of an # if/else commented out. (So for input like # <script>var x = { {%if c%}y: 4{%else%}y: 5{%endif%} };</script> # we'd see both 'var x = { y: 4 };' and 'var x = { y: 5 }') # We lint all such strings and combine the output. js_contents_iter = js_in_html.extract_js_from_html(contents_of_f, "jinja2", keep_re=var_re, file_name=f) try: for js_contents in js_contents_iter: if (not _IIFE_RE.match(js_contents) and var_re.search(js_contents)): # Our script is the first non-blank line in the # extracted output. lineno = len(js_contents) - len(js_contents.lstrip('\n')) if not lintutil.has_nolint(f, lineno + 1): yield (f, lineno + 1, "Must wrap this script's contents in " "'(function() { ... })();' (IIFE) to " "avoid leaking vars to global state") except Exception, why: yield (f, 1, 'HTML parse error: %s' % why) continue
def lint_no_jinja2_markup_in_python(files_to_lint): """Enforce that python code does not call jinja2.Markup(). jinja2.Markup(s) says that jinja2 should not auto-escape s. This is certainly useful to be able to do, but we shouldn't be doing it in python code (unless we're writing a jinja2 filter or tag)! Instead, the jinja2 template itself should be using the |safe modifier. We enforce that this happens. The reason is that Markup() objects look like unicode but have these magical properties that can explode and cause trouble unexpectedly -- usually html-escaping things that are interpolated into it, without us wanting to. As one example of the trouble it can cause, there's this code in api/jsonify.py: if isinstance(obj, jinja2.Markup): # Use the plain str representation of jinja2 Markup for jsonify. # Otherwise jinja2's Markup object will try to improperly encode # the double quotes that jsonify wraps around all strings. return unicode(obj) Needing to sprinkle special-case code like that everywhere is ugly; figuring out where such code needs to be sprinkled is a nightmare. As always, code that needs to override this check can use @Nolint. """ # Only need to lint python files, but not test files. # And Markup() is expected in template-tags and template-filters. exclude = ('_test.py', '/templatetags.py', '/templatefilters.py') files = lintutil.filter(files_to_lint, suffix='.py', exclude_substrings=exclude) for tokens in lintutil.find_token_streams(files, _JINJA2_MARKUP_TOKENS): yield (tokens[0][0], tokens[0][3][0], # filename and linenum 'Do not call jinja2.Markup() from code' ' (except in templatetags/filters).' ' Use |safe in templates instead.') for (fname, linenum, _) in lintutil.find_lines(files, _IMPORT_MARKUP_RE): yield (fname, linenum, 'Import full modules, not classes from modules (Markup)')
def lint_no_user_property(files_to_lint): """Enforce that nobody uses UserProperty. ...unless marked as an explicitly approved legacy usage via @Nolint. """ files_to_lint = lintutil.filter(files_to_lint, suffix='.py') for filename in files_to_lint: contents = lintutil.file_contents(filename) for fn_match in _USERPROPERTY_RE.finditer(contents): # Make sure there's no @Nolint anywhere around this function. newline = contents.find('\n', fn_match.end()) newline = newline if newline > -1 else len(contents) if '@Nolint' in contents[fn_match.start():newline]: continue linenum = 1 + contents.count('\n', 0, fn_match.start()) yield ( filename, linenum, # filename and linenum "Do not use UserProperty, it is not safe. Use UserData's " "key as its foreign key, instead.")
def lint_redundant_imports(files_to_lint): """You don't need to do both `import a.b` and `from a import b`!""" files_to_lint = lintutil.filter(files_to_lint, suffix='.py') for f in files_to_lint: import_lines = _get_import_lines(f) import_modules = {} # map from module to import_line for import_line in import_lines: other_import = import_modules.get(import_line.module, None) if other_import: yield (f, import_line.lineno, "Already imported on line %s" % other_import.lineno) else: # It's common to import the same module multiple times # if it's within functions (the import only exists in # a limited scope) or try/except blocks, so we don't # complain if something shadows another non-top-level # import. if import_line.colno == 0: import_modules[import_line.module] = import_line
def lint_circular_imports(files_to_lint): """Report on all circular imports found in files_to_lint. A circular import is when A imports B imports C imports A. We focus only on *simple* cycles, where each node occurs only once in a cycle (so a figure-8 would be two cycles, one for the top circle and one for the bottom, not a single, more complicated cycle). The rule is that we report all cycles that include any files in files_to_lint. (For reproducibility, we always print the cycle starting with the file in the cycle that comes first alphabetically.) It's very possible for a single file to have multiple circular imports. We report them all. """ files_to_lint = lintutil.filter(files_to_lint, suffix='.py') module_to_file = { ka_root.relpath(os.path.splitext(f)[0]).replace(os.sep, '.'): f for f in files_to_lint } saw_a_cycle = False for cycle in sorted(_find_all_cycles(module_to_file.keys())): saw_a_cycle = True # We report the line-number where cycle[0] imports cycle[1]. f = module_to_file.get(cycle[0]) if f is None: # means this cycle does not include any files-to-lint continue import_lines = _get_import_lines(f) desired_import_line = next(il for il in import_lines if il.module == cycle[1]) yield (f, desired_import_line.lineno, ('Resolve this import cycle (by making a late import): %s' % ' -> '.join(cycle))) if not saw_a_cycle: # This is only safe to do when there are no cycles already! for error in _lint_unnecessary_late_imports(files_to_lint): yield error
def lint_no_wrong_i18n_markup_in_jinja2(files_to_lint): """Find where we mark js within html with i18n._ instead of {{ _js() Returns: List of triples: (filename, lineno, error message) """ files = lintutil.filter(files_to_lint, prefix='templates/', suffix='.html') for filename in files: contents = lintutil.file_contents(filename) for fn_match in _BAD_JS_MARKUP.finditer(contents): # Make sure there's no @Nolint anywhere around this function. newline = contents.find('\n', fn_match.end()) newline = newline if newline > -1 else len(contents) if '@Nolint' in contents[fn_match.start():newline]: continue for m in _BAD_JS_MARKUP.finditer(fn_match.group(0)): linenum = 1 + contents.count('\n', 0, fn_match.start() + m.start(1)) yield (filename, linenum, 'Do {{ _js("%s") }} instead of %s inside <script> ' 'tags within jinja2.' % (m.group(1), m.group(0)))
def lint_no_wrong_i18n_markup_in_jinja2(files_to_lint): """Find where we mark js within html with i18n._ instead of {{ _js() Returns: List of triples: (filename, lineno, error message) """ files = lintutil.filter(files_to_lint, prefix='templates/', suffix='.html') for filename in files: contents = lintutil.file_contents(filename) for fn_match in _BAD_JS_MARKUP.finditer(contents): # Make sure there's no @Nolint anywhere around this function. newline = contents.find('\n', fn_match.end()) newline = newline if newline > -1 else len(contents) if '@Nolint' in contents[fn_match.start():newline]: continue for m in _BAD_JS_MARKUP.finditer(fn_match.group(0)): linenum = 1 + contents.count('\n', 0, fn_match.start() + m.start(1)) yield (filename, linenum, 'Do {{ _js("%s") }} instead of %s inside <script> ' 'tags within jinja2.' % (m.group(1), m.group(0)))
def lint_all_wsgi_entrypoint_imports(files_to_lint): """Enforce that every WSGI entrypoint imports appengine_config first. On App Engine Standard, appengine_config is guaranteed to be imported before anything else. But this isn't guaranteed on VM, which is bad because our code assumes it! So we require every WSGI entrypoint to manually import it first thing, before any other imports. (It likely only matters that it's before all first-party and shared imports, but we require that it be first to keep things simple. We do allow __future__ imports first, since those are easy to check and python wants them first.) """ entrypoint_filenames = set() for gae_module_name in modules_util.all_modules: entrypoint_module_names = ( modules_util.app_yaml_entrypoint_modules(gae_module_name)) for entrypoint_module_name in entrypoint_module_names: filename = importlib.import_module(entrypoint_module_name).__file__ # replace .pyc, .pyo etc. filename = os.path.splitext(filename)[0] + '.py' entrypoint_filenames.add(ka_root.relpath(filename)) if lintutil.filter(files_to_lint, suffix='.yaml'): # If any yaml files were modified, we need to recheck all entrypoints. # TODO(benkraft): We should really only need to do this if any # module-yaml, or any yaml included by one, has changed, but # determining which those are is tricky, so we assume they all are. filenames_to_check = entrypoint_filenames else: # Otherwise, we just need to check changed entrypoints. files_to_lint = set(ka_root.relpath(f) for f in files_to_lint) filenames_to_check = entrypoint_filenames & files_to_lint for filename in filenames_to_check: if filename.startswith('third_party/'): continue maybe_error = _lint_single_wsgi_entrypoint_import(filename) if maybe_error: yield maybe_error
def lint_all_wsgi_entrypoint_imports(files_to_lint): """Enforce that every WSGI entrypoint imports appengine_config first. On App Engine Standard, appengine_config is guaranteed to be imported before anything else. But this isn't guaranteed on VM, which is bad because our code assumes it! So we require every WSGI entrypoint to manually import it first thing, before any other imports. (It likely only matters that it's before all first-party and shared imports, but we require that it be first to keep things simple. We do allow __future__ imports first, since those are easy to check and python wants them first.) """ entrypoint_filenames = set() for gae_module_name in modules_util.all_modules: entrypoint_module_names = ( modules_util.app_yaml_entrypoint_modules(gae_module_name)) for entrypoint_module_name in entrypoint_module_names: filename = importlib.import_module(entrypoint_module_name).__file__ # replace .pyc, .pyo etc. filename = os.path.splitext(filename)[0] + '.py' entrypoint_filenames.add(ka_root.relpath(filename)) if lintutil.filter(files_to_lint, suffix='.yaml'): # If any yaml files were modified, we need to recheck all entrypoints. # TODO(benkraft): We should really only need to do this if any # module-yaml, or any yaml included by one, has changed, but # determining which those are is tricky, so we assume they all are. filenames_to_check = entrypoint_filenames else: # Otherwise, we just need to check changed entrypoints. files_to_lint = set(ka_root.relpath(f) for f in files_to_lint) filenames_to_check = entrypoint_filenames & files_to_lint for filename in filenames_to_check: if filename.startswith('third_party/'): continue maybe_error = _lint_single_wsgi_entrypoint_import(filename) if maybe_error: yield maybe_error
def lint_non_literal_i18n_do_not_translate(files_to_lint): """Find where we mark html as not needing translation but its non-literal We require anything we mark with i18n_do_not_translate to be only a string literal. i18n_do_not_translate marks the string as "safe" (that is jinja2 won't autoescape it) so we need to be extra careful we don't create a potential XSS attack vector if we end up marking some variable as safe. Unless that line is marked with {{# @Nolint #}}, we flag it as a lint error. Returns: List of triples: (filename, lineno, error message) """ # Limit files-to-lint to files under templates/ that are html or txt. files = lintutil.filter(files_to_lint, prefix='templates/', suffix=('.html', '.txt')) for filename in files: contents = lintutil.file_contents(filename) for fn_match in _J2_FUNCTION_RE.finditer(contents): # Make sure there's no @Nolint anywhere around this function. newline = contents.find('\n', fn_match.end()) newline = newline if newline > -1 else len(contents) if '@Nolint' in contents[fn_match.start():newline]: continue for m in _BAD_DO_NOT_TRANSLATE_RE.finditer(fn_match.group(0)): linenum = 1 + contents.count('\n', 0, fn_match.start() + m.start(1)) yield (filename, linenum, '%s contains something that is not just a ' 'literal string. Only literal strings can be ' 'inside i18n_do_not_translate.' % m.group(1))
def lint_missing_safe_in_jinja2(files_to_lint): """Find instances where we translate html but don't mark it |safe. We html-escape the output of {{ _("...") }} in html templates, which is safe but can cause problems when the text is {{ _("<b>hi</b>") }}. In that case, the user needs to do {{ _("<b>hi</b>")|safe }}. We detect instances where the user does {{ _("...<..") }} in jinja2 templates but lacks the |safe afterwards. Unless that line is marked with {{# @Nolint #}}, we flag it as a lint error. Returns: List of triples: (filename, lineno, error message) """ # Limit files-to-lint to html and txt files under templates/. files = lintutil.filter(files_to_lint, prefix='templates/', suffix=('.html', '.txt')) for filename in files: contents = lintutil.file_contents(filename) for fn_match in _J2_FUNCTION_RE.finditer(contents): # Make sure there's no @Nolint anywhere around this function. newline = contents.find('\n', fn_match.end()) newline = newline if newline > -1 else len(contents) if '@Nolint' in contents[fn_match.start():newline]: continue for m in _GETTEXT_RE.finditer(fn_match.group(0)): if _needs_safe(m.group(1), m.group(2)): linenum = 1 + contents.count('\n', 0, fn_match.start() + m.start(1)) yield (filename, linenum, 'Replace %s with _("..."|safe) to avoid escaping' 'the tag/attr inside the _().' % m.group(1)) for km in _KWARG_RE.finditer(m.group(3)): if _needs_safe(km.group(1), km.group(2)): linenum = 1 + contents.count( '\n', 0, fn_match.start() + m.start() + km.start(1)) yield (filename, linenum, 'Replace %s with _(..., foo="..."|safe) to ' 'avoid escaping the tag/attr inside foo.' % km.group(1)) for m in _NGETTEXT_RE.finditer(fn_match.group(0)): if _needs_safe(m.group(1), m.group(2)): linenum = 1 + contents.count('\n', 0, fn_match.start() + m.start(1)) yield (filename, linenum, 'Replace %s with ngettext("..."|safe, ...) to ' 'avoid escaping the tag/attr inside the _().' % m.group(1)) if _needs_safe(m.group(3), m.group(4)): linenum = 1 + contents.count('\n', 0, fn_match.start() + m.start(3)) yield (filename, linenum, 'Replace %s with ngettext(..., "..."|safe) to ' 'avoid escaping the tag/attr inside the _().' % m.group(3)) for km in _KWARG_RE.finditer(m.group(5)): if _needs_safe(km.group(1), km.group(2)): linenum = 1 + contents.count( '\n', 0, fn_match.start() + m.start() + km.start(1)) yield (filename, linenum, 'Replace %s with ngettext(..., foo="..."|safe)' ' to avoid escaping the tag/attr inside foo.' % km.group(1))
def lint_not_using_gettext_at_import_time(files_to_lint): """Make sure we don't use i18n._/etc in a static context. If you have a global variable such as '_FOO = i18n._("bar")', at the top of some .py file, it won't work the way you intend because i18n._() needs to be called while handling a request in order to know what language to translate to. (Instead, you'd need to do _FOO = lambda: i18n._("bar") or some such.) This tests for this by mocking i18n._ et al., and then importing everything (but running nothing). Any i18n._ calls that happen during this import are problematic! We have to spawn a new python process to make sure we do the importing properly (and without messing with the currently running python environment!) """ candidate_files_to_lint = lintutil.filter(files_to_lint, suffix='.py') files_to_lint = [] for filename in candidate_files_to_lint: contents = lintutil.file_contents(filename) # Check that it's plausible this file uses i18n._ or similar. # This also avoids importing random third-party files that may # have nasty side-effects at import time (all our code is too # well-written to do that!) if 'import intl' in contents or 'from intl' in contents: files_to_lint.append(filename) program = """\ import os # @Nolint(linter can't tell this is in a string!) import sys # @Nolint(linter can't tell this is in a string!) import traceback import intl.request # @Nolint(seems unused to our linter but it's used) _ROOT = "%s" def add_lint_error(f): # We assume code in 'intl' doesn't make this mistake, and thus # the first stack-frame before we get into 'intl' is the # offending code. ctx == '<string>' means the error occurred in # this pseudo-script. for (ctx, lineno, fn, line) in reversed(traceback.extract_stack()): if os.path.isabs(ctx): ctx = os.path.relpath(ctx, _ROOT) if ctx != '<string>' and not ctx.startswith('intl/'): if ctx == f: print 'GETTEXT ERROR {} {}'.format(ctx, lineno) break return 'en' # a fake value for intl.request.ka_locale """ % ka_root.root if not files_to_lint: return for filename in files_to_lint: modulename = ka_root.relpath(filename) modulename = os.path.splitext(modulename)[0] # nix .py modulename = modulename.replace('/', '.') # Force a re-import. program += 'sys.modules.pop("%s", None)\n' % modulename program += ('intl.request.ka_locale = lambda: add_lint_error("%s")\n' % ka_root.relpath(filename)) program += 'import %s\n' % modulename p = subprocess.Popen( ['env', 'PYTHONPATH=%s' % ':'.join(sys.path), sys.executable, '-c', program], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) p.wait() lint_output = p.stdout.read() for line in lint_output.splitlines(): if line.startswith('GETTEXT ERROR '): line = line[len('GETTEXT ERROR '):] (filename, linenum) = line.rsplit(' ', 1) yield (ka_root.join(filename), int(linenum), 'Trying to translate at import-time, but ' 'translation only works at runtime! ' 'Use intl.i18n.mark_for_translation() instead.')
def lint_banned_file_types(files_to_lint): """Find all the bad files using banned extensions and complain.""" bad_files = lintutil.filter(files_to_lint, suffix=_BAD_EXTENSIONS, exclude_substrings=_EXISTING_FILE_WHITELIST) for bad_file in bad_files: yield (bad_file, 1, "Use React and Aphrodite instead.")
def lint_missing_safe_in_jinja2(files_to_lint): """Find instances where we translate html but don't mark it |safe. We html-escape the output of {{ _("...") }} in html templates, which is safe but can cause problems when the text is {{ _("<b>hi</b>") }}. In that case, the user needs to do {{ _("<b>hi</b>")|safe }}. We detect instances where the user does {{ _("...<..") }} in jinja2 templates but lacks the |safe afterwards. Unless that line is marked with {{# @Nolint #}}, we flag it as a lint error. Returns: List of triples: (filename, lineno, error message) """ # Limit files-to-lint to html and txt files under templates/. files = lintutil.filter(files_to_lint, prefix='templates/', suffix=('.html', '.txt')) for filename in files: contents = lintutil.file_contents(filename) for fn_match in _J2_FUNCTION_RE.finditer(contents): # Make sure there's no @Nolint anywhere around this function. newline = contents.find('\n', fn_match.end()) newline = newline if newline > -1 else len(contents) if '@Nolint' in contents[fn_match.start():newline]: continue for m in _GETTEXT_RE.finditer(fn_match.group(0)): if _needs_safe(m.group(1), m.group(2)): linenum = 1 + contents.count('\n', 0, fn_match.start() + m.start(1)) yield (filename, linenum, 'Replace %s with _("..."|safe) to avoid escaping' 'the tag/attr inside the _().' % m.group(1)) for km in _KWARG_RE.finditer(m.group(3)): if _needs_safe(km.group(1), km.group(2)): linenum = 1 + contents.count('\n', 0, fn_match.start() + m.start() + km.start(1)) yield (filename, linenum, 'Replace %s with _(..., foo="..."|safe) to ' 'avoid escaping the tag/attr inside foo.' % km.group(1)) for m in _NGETTEXT_RE.finditer(fn_match.group(0)): if _needs_safe(m.group(1), m.group(2)): linenum = 1 + contents.count('\n', 0, fn_match.start() + m.start(1)) yield (filename, linenum, 'Replace %s with ngettext("..."|safe, ...) to ' 'avoid escaping the tag/attr inside the _().' % m.group(1)) if _needs_safe(m.group(3), m.group(4)): linenum = 1 + contents.count('\n', 0, fn_match.start() + m.start(3)) yield (filename, linenum, 'Replace %s with ngettext(..., "..."|safe) to ' 'avoid escaping the tag/attr inside the _().' % m.group(3)) for km in _KWARG_RE.finditer(m.group(5)): if _needs_safe(km.group(1), km.group(2)): linenum = 1 + contents.count('\n', 0, fn_match.start() + m.start() + km.start(1)) yield (filename, linenum, 'Replace %s with ngettext(..., foo="..."|safe)' ' to avoid escaping the tag/attr inside foo.' % km.group(1))
def lint_static_url_modifier(files_to_lint): """Be sure every static url in an .html file uses |static_url or static(). We serve static urls such as images and fonts from kastatic.org, for efficiency. The |static_url and static_url() jinja2 modifiers are ways to rewrite urls to use that. This linter makes sure people remember to do that for new code. """ files = lintutil.filter(files_to_lint, prefix='templates/', suffix='.html') if not files: return # generate_static_urls_regexp matches standalone urls, but we need # to match them in the context of a line, so we need to mess with # ^ and $ a bit. We just get rid of $, which could yield false # positives in theory but doesn't in practice. We replace ^ with # something that matches 'things you might find before the start # of a static url but not before something like an API route name." static_url_regexp = url_util.STATIC_URL_REGEX static_url_regexp = static_url_regexp.replace('$', '') # The '%s' in the regexp below is to catch jinja2 html like: # <img src="{{ "https://%s/images/..." % hostname }}"> static_url_regexp = static_url_regexp.replace( '^/', r'([^\w_./-]["\'/]|khanacademy.org/|%s/)') # It's good when the static-url is followed by '|static_url' (it # needs to be in a '{{ ... }}' for that) or preceded by 'static_url('. # We say the regexp is bad if neither case is present. # This is cheaper than using a negative lookahead (I think). good_regexp = (r'(static_url\s*\(\s*)?(%s)([^}]*\|static_url)?' % static_url_regexp) # We do two checks: one for the static url by name, and one for # any src= parameter (or poster=, which is used for videos). static_url_re = re.compile(good_regexp) # We take advantage of the fact we don't use spaces in our static urls. src_regex = re.compile(r'\b(src|poster)=((?:\{[^\}]*\}|[^ >])*)') for f in files: contents_of_f = lintutil.file_contents(f) error_lines = set() # just to avoid duplicate error messages for m in static_url_re.finditer(contents_of_f): if not m.groups()[0] and not m.groups()[-1]: lineno = 1 + contents_of_f.count('\n', 0, m.start()) # STATIC_URL_REGEX matches the character before the url-string, # and also the opening quote, so get rid of them both. url_prefix = m.group(2)[1:].strip('\'\"') yield (f, lineno, ('The static url "%s..." should use "|static_url" ' + 'after the url or call "static_url()" with the url ' + 'passed in') % url_prefix) error_lines.add(lineno) # emails maybe want to point to ka.org to avoid confusion. # TODO(csilvers): should we point them to kastatic.org too? if '/emails/' not in f: for m in src_regex.finditer(contents_of_f): url = m.group(2).strip('\'\"{} ') if not url: continue # A simple, but effective, effort to find urls that point to # non-ka domains. (Most ka domains we use are .org!) if '.com' in url and '.appspot.com' not in url: continue if url.startswith('data:'): continue # If it's already a url that we serve off the CDN, don't # complain. if any(u in url for (_, u) in url_util.FASTLY_URL_MAP): continue # If it's an <iframe src=...>, don't complain; those are # never static resources. We just do a cheap check. end_of_line = contents_of_f.index('\n', m.end()) if '</iframe>' in contents_of_f[m.end():end_of_line]: continue if '|static_url' not in url: lineno = 1 + contents_of_f.count('\n', 0, m.start()) if lineno not in error_lines: # a new error yield (f, lineno, 'Use |static_url with "%s" attributes' % m.group(1))