def lint_absolute_import(files_to_lint): """Make sure we use absolute imports everywhere. Suppose you have code like a top-level util.py and also email/util.py, and you do "import util". If you're doing that from email/foo.py, then it will import email/util.py instead of the top-level util.py. Using absolute imports everywhere avoids the risk of that problem. It also makes it more obvious to those looking at the code, exactly what's being imported. """ files_to_lint = lintutil.filter(files_to_lint, suffix='.py') for f in files_to_lint: import_lines = _get_import_lines(f) if not import_lines: # No imports? Then no need for an absolute-import directive. continue if any(i.module == '__future__.absolute_import' for i in import_lines): # Has an absolute-import, we're happy. continue contents = lintutil.file_contents(f) if contents.startswith('#!'): # If the file can be run as a script, then absolute-import # doesn't always work since we don't automatically have # webapp-root in the pythonpath for scripts. So we don't # require absolute-import in that case. continue # __future__ imports come first, so the best line number to # report for the error is the first import line. yield (f, import_lines[0].lineno, 'Modules must use: from __future__ import absolute_import')
def lint_js_files_are_translated(files_to_lint): """Verify that nltext in the js files are marked for translation. See docstring of: _lint_js_content Returns: List of triples: (filename, lineno, error message) """ # Make sure jsx files are compiled first, then we will lint the resulting # js. kake.make.build_many([ ('genfiles/compiled_jsx/en/%s.js' % ka_root.relpath(f), {}) for f in files_to_lint if f.endswith('.jsx') ]) files_to_lint = lintutil.filter(files_to_lint, suffix=('.js', '.jsx')) for f in files_to_lint: abs_f = f f = ka_root.relpath(f) # Exclude files that we don't need to translate: we don't care # if those files are 'properly' marked up or not. if intl.english_only.should_not_translate_file(f): continue if f.endswith(".jsx"): abs_f = "%s/genfiles/compiled_jsx/en/%s.js" % (ka_root.root, f) f = ka_root.relpath(abs_f) file_contents = lintutil.file_contents(abs_f) for error in _lint_js_content(abs_f, file_contents): yield error
def lint_non_literal_i18n_in_javascript(files_to_lint): """Complain about uses of i18n._() on something other than a string. i18n._(variable) is dangerous -- you don't know if the variable has been translated or not. """ files_to_lint = lintutil.filter(files_to_lint, suffix=('.js', '.jsx'), exclude_substrings=('/i18n.js', '/i18n_test.js')) # This regexp pattern captures a string, possibly concatenated with +'s. js_str = (r'(?:' + r'"(?:\\.|[^"])*"|' + r"'(?:\\.|[^'])*'|" + r'`(?:\\.|[^`])*`' + ')') js_concat_str = '\s*%s(?:\s*\+\s*%s)*\s*' % (js_str, js_str) gettext_occurrences = re.compile(r'\b(i18n._|i18n.ngettext)\(') valid_gettext_occurrences = re.compile( r'\bi18n._\(%(str)s[,)]|\bi18n.ngettext\(%(str)s,\s*%(str)s[,)]' % {'str': js_concat_str}) for f in files_to_lint: contents = lintutil.file_contents(f) all_occurrences = { m.start(): m for m in gettext_occurrences.finditer(contents) } valid_occurrences = { m.start(): m for m in valid_gettext_occurrences.finditer(contents) } for (startpos, m) in all_occurrences.iteritems(): i18n_fn = m.group(1) msg = None # set to non-None if there's a problem. if startpos not in valid_occurrences: msg = ('%s must have string literals as arguments, ' 'with no variables or templates' % i18n_fn) else: # Then we're ok with this! *Unless* it's a template string # with $(...) inside it, then we're not ok. m2 = valid_occurrences[startpos] if m2.group().count(r'${') > m2.group().count(r'\${'): msg = ('You must use %' + '(...)s with template strings ' 'inside %s, not ${...}' % i18n_fn) if msg: start_lineno = 1 + contents.count('\n', 0, startpos) # Doing a real regexp to find the end of this function call # is tough, we just do something simple and pray. end_paren = contents.find(')', startpos) if end_paren == -1: end_paren = len(contents) end_lineno = 1 + contents.count('\n', 0, end_paren) if any( lintutil.has_nolint(f, lineno) for lineno in xrange(start_lineno, end_lineno + 1)): continue yield (f, start_lineno, msg)
def lint_js_files_are_translated(files_to_lint): """Verify that nltext in the js files are marked for translation. See docstring of: _lint_js_content Returns: List of triples: (filename, lineno, error message) """ # Make sure jsx files are compiled first, then we will lint the resulting # js. kake.make.build_many([('genfiles/compiled_jsx/en/%s.js' % ka_root.relpath(f), {}) for f in files_to_lint if f.endswith('.jsx')]) files_to_lint = lintutil.filter(files_to_lint, suffix=('.js', '.jsx')) for f in files_to_lint: abs_f = f f = ka_root.relpath(f) # Exclude files that we don't need to translate: we don't care # if those files are 'properly' marked up or not. if intl.english_only.should_not_translate_file(f): continue if f.endswith(".jsx"): abs_f = "%s/genfiles/compiled_jsx/en/%s.js" % (ka_root.root, f) f = ka_root.relpath(abs_f) file_contents = lintutil.file_contents(abs_f) for error in _lint_js_content(abs_f, file_contents): yield error
def lint_every_rendered_component_has_a_fixture(files_to_lint): """Check that every component we render has an associated fixture file. In order to test that a particular react component can be server-side rendered, we need to actually try to render it with a particular value for props. This is what component.fixture.js files are for. We just make sure the people write them! For now, we allow the fixture file to be empty (just `[]`). Later we may insist on actually useful fixtures. """ files_to_lint = lintutil.filter(files_to_lint, suffix='.html') for f in files_to_lint: contents_of_f = lintutil.file_contents(f) for m in RENDER_REACT_RE.finditer(contents_of_f): component_file = m.group(1) # To be server-side renderable, the fixture file has to be # a javascript file, not jsx or something else. fixture_file = component_file + '.fixture.js' if not os.path.exists(ka_root.join(fixture_file)): linenum = contents_of_f.count('\n', 0, m.start()) + 1 yield (f, linenum, '%s must have an associated fixture file %s' % (component_file, fixture_file))
def lint_duplicate_test_functions(files_to_lint): """Enforce that no two test functions in the same class have the same name. This prevents one test from not running because another test overwrites it. """ files = lintutil.filter(files_to_lint, suffix='_test.py') for filename in files: contents = lintutil.file_contents(filename) functions = _matches_with_line_numbers(_TEST_FN_RE, contents) classes = _matches_with_line_numbers(_CLASS_NAME_RE, contents) lines = collections.defaultdict(list) for fname, line in functions: cls = _find_class_at_line(classes, line) if cls is None: continue qualified_fname = cls + '.' + fname lines[qualified_fname] += [line] for fname, dup_lines in lines.iteritems(): if len(dup_lines) > 1: if any( lintutil.has_nolint(filename, line) for line in dup_lines): continue yield (filename, max(dup_lines), 'Duplicate test function name %(name)s' ' at lines %(lines)s' % { 'name': fname, 'lines': ', '.join(str(l) for l in dup_lines) })
def lint_duplicate_test_functions(files_to_lint): """Enforce that no two test functions in the same class have the same name. This prevents one test from not running because another test overwrites it. """ files = lintutil.filter(files_to_lint, suffix='_test.py') for filename in files: contents = lintutil.file_contents(filename) functions = _matches_with_line_numbers(_TEST_FN_RE, contents) classes = _matches_with_line_numbers(_CLASS_NAME_RE, contents) lines = collections.defaultdict(list) for fname, line in functions: cls = _find_class_at_line(classes, line) if cls is None: continue qualified_fname = cls + '.' + fname lines[qualified_fname] += [line] for fname, dup_lines in lines.iteritems(): if len(dup_lines) > 1: if any(lintutil.has_nolint(filename, line) for line in dup_lines): continue yield (filename, max(dup_lines), 'Duplicate test function name %(name)s' ' at lines %(lines)s' % { 'name': fname, 'lines': ', '.join(str(l) for l in dup_lines) })
def lint_javascript_in_html(files_to_lint): """Run eslint on javascript content inside html files. We want to make sure that the js in our html files has no unused variables/etc, so we can do better dependency analysis on it. """ files = lintutil.filter(files_to_lint, prefix='templates/', suffix='.html') lint_inputs = [] # list of filename/contents pairs for f in files: contents_of_f = lintutil.file_contents(f) # extract_js_from_html actually can return several versions # of the js contents, each with a different branch of an # if/else commented out. (So for input like # <script>var x = { {%if c%}y: 4{%else%}y: 5{%endif%} };</script> # we'd see both 'var x = { y: 4 };' and 'var x = { y: 5 }') # We lint all such strings and combine the output. js_contents_iter = js_in_html.extract_js_from_html(contents_of_f, "jinja2", file_name=f) try: for js_contents in js_contents_iter: lint_inputs.append((f, js_contents)) except Exception, why: yield (f, 1, 'HTML parse error: %s' % why) continue
def lint_package_imports(files_to_lint): """Find cases where we import a package (directory) rather than a module. For instance, "import emails". That is not a module, it's a directory. Sometimes importing a directory is correct, for third-party modules with non-trivial __init__.py's. But it's never right for our code. """ files_to_lint = lintutil.filter(files_to_lint, suffix='.py') for f in files_to_lint: import_lines = _get_import_lines(f) for import_line in import_lines: if import_line.module.startswith(('__future__', 'third_party')): # The rules are different for third-party code, and # __future__'s aren't even real modules. continue if _is_a_first_party_package(import_line.module): contents = lintutil.file_contents(f) corrections = re.findall(r'%s\.[\w_]+' % import_line.name, contents) yield ( f, import_line.lineno, "Do not import the whole directory '%s'; " "import the modules inside it, possibly: %s" % (import_line.module, ', '.join(sorted(set(corrections)))))
def lint_non_literal_i18n_do_not_translate(files_to_lint): """Find where we mark html as not needing translation but its non-literal We require anything we mark with i18n_do_not_translate to be only a string literal. i18n_do_not_translate marks the string as "safe" (that is jinja2 won't autoescape it) so we need to be extra careful we don't create a potential XSS attack vector if we end up marking some variable as safe. Unless that line is marked with {{# @Nolint #}}, we flag it as a lint error. Returns: List of triples: (filename, lineno, error message) """ # Limit files-to-lint to files under templates/ that are html or txt. files = lintutil.filter(files_to_lint, prefix='templates/', suffix=('.html', '.txt')) for filename in files: contents = lintutil.file_contents(filename) for fn_match in _J2_FUNCTION_RE.finditer(contents): # Make sure there's no @Nolint anywhere around this function. newline = contents.find('\n', fn_match.end()) newline = newline if newline > -1 else len(contents) if '@Nolint' in contents[fn_match.start():newline]: continue for m in _BAD_DO_NOT_TRANSLATE_RE.finditer(fn_match.group(0)): linenum = 1 + contents.count('\n', 0, fn_match.start() + m.start(1)) yield (filename, linenum, '%s contains something that is not just a ' 'literal string. Only literal strings can be ' 'inside i18n_do_not_translate.' % m.group(1))
def lint_non_literal_i18n_in_javascript(files_to_lint): """Complain about uses of i18n._() on something other than a string. i18n._(variable) is dangerous -- you don't know if the variable has been translated or not. """ files_to_lint = lintutil.filter( files_to_lint, suffix=('.js', '.jsx'), exclude_substrings=('/i18n.js', '/i18n_test.js')) # This regexp pattern captures a string, possibly concatenated with +'s. js_str = (r'(?:' + r'"(?:\\.|[^"])*"|' + r"'(?:\\.|[^'])*'|" + r'`(?:\\.|[^`])*`' + ')') js_concat_str = '\s*%s(?:\s*\+\s*%s)*\s*' % (js_str, js_str) gettext_occurrences = re.compile(r'\b(i18n._|i18n.ngettext)\(') valid_gettext_occurrences = re.compile( r'\bi18n._\(%(str)s[,)]|\bi18n.ngettext\(%(str)s,\s*%(str)s[,)]' % {'str': js_concat_str}) for f in files_to_lint: contents = lintutil.file_contents(f) all_occurrences = { m.start(): m for m in gettext_occurrences.finditer(contents)} valid_occurrences = { m.start(): m for m in valid_gettext_occurrences.finditer(contents)} for (startpos, m) in all_occurrences.iteritems(): i18n_fn = m.group(1) msg = None # set to non-None if there's a problem. if startpos not in valid_occurrences: msg = ('%s must have string literals as arguments, ' 'with no variables or templates' % i18n_fn) else: # Then we're ok with this! *Unless* it's a template string # with $(...) inside it, then we're not ok. m2 = valid_occurrences[startpos] if m2.group().count(r'${') > m2.group().count(r'\${'): msg = ('You must use %' + '(...)s with template strings ' 'inside %s, not ${...}' % i18n_fn) if msg: start_lineno = 1 + contents.count('\n', 0, startpos) # Doing a real regexp to find the end of this function call # is tough, we just do something simple and pray. end_paren = contents.find(')', startpos) if end_paren == -1: end_paren = len(contents) end_lineno = 1 + contents.count('\n', 0, end_paren) if any(lintutil.has_nolint(f, lineno) for lineno in xrange(start_lineno, end_lineno + 1)): continue yield (f, start_lineno, msg)
def _find_unused_imports(root, module_parts_so_far): """Yields (module_name, lineno) pairs of errors.""" for (part, subtree) in root.iteritems(): if isinstance(subtree, dict): # Recurse for error in _find_unused_imports( subtree, module_parts_so_far + [part]): yield error elif subtree == 'USED': # Used import pass else: # Unused import contents = lintutil.file_contents(f) lines = contents.splitlines(True) # Here, `subtree` stores the line the import was on lineno = subtree # If it has an "@UnusedImport" decorator, unused == ok! if '@UnusedImport' not in lines[lineno - 1]: yield (".".join(module_parts_so_far + [part]), lineno)
def lint_no_user_property(files_to_lint): """Enforce that nobody uses UserProperty. ...unless marked as an explicitly approved legacy usage via @Nolint. """ files_to_lint = lintutil.filter(files_to_lint, suffix='.py') for filename in files_to_lint: contents = lintutil.file_contents(filename) for fn_match in _USERPROPERTY_RE.finditer(contents): # Make sure there's no @Nolint anywhere around this function. newline = contents.find('\n', fn_match.end()) newline = newline if newline > -1 else len(contents) if '@Nolint' in contents[fn_match.start():newline]: continue linenum = 1 + contents.count('\n', 0, fn_match.start()) yield (filename, linenum, # filename and linenum "Do not use UserProperty, it is not safe. Use UserData's " "key as its foreign key, instead.")
def lint_iife_in_html_javascript(files_to_lint): """Make sure js in <script> tags doesn't leak globals by using an IIFE. IIFE is when you surround js code in '(function() { ... })();'. This prevents any variable declarations from polluting the global scope. We require all <script> tags to have an IIFE if they declare a variable. """ files = lintutil.filter(files_to_lint, prefix='templates/', suffix='.html') # For determining if we need an IIFE, we only care about 'var' # statements. var_re = re.compile(r'\bvar\b') for f in files: contents_of_f = lintutil.file_contents(f) # extract_js_from_html actually can return several versions # of the js contents, each with a different branch of an # if/else commented out. (So for input like # <script>var x = { {%if c%}y: 4{%else%}y: 5{%endif%} };</script> # we'd see both 'var x = { y: 4 };' and 'var x = { y: 5 }') # We lint all such strings and combine the output. js_contents_iter = js_in_html.extract_js_from_html(contents_of_f, "jinja2", keep_re=var_re, file_name=f) try: for js_contents in js_contents_iter: if (not _IIFE_RE.match(js_contents) and var_re.search(js_contents)): # Our script is the first non-blank line in the # extracted output. lineno = len(js_contents) - len(js_contents.lstrip('\n')) if not lintutil.has_nolint(f, lineno + 1): yield (f, lineno + 1, "Must wrap this script's contents in " "'(function() { ... })();' (IIFE) to " "avoid leaking vars to global state") except Exception, why: yield (f, 1, 'HTML parse error: %s' % why) continue
def lint_no_user_property(files_to_lint): """Enforce that nobody uses UserProperty. ...unless marked as an explicitly approved legacy usage via @Nolint. """ files_to_lint = lintutil.filter(files_to_lint, suffix='.py') for filename in files_to_lint: contents = lintutil.file_contents(filename) for fn_match in _USERPROPERTY_RE.finditer(contents): # Make sure there's no @Nolint anywhere around this function. newline = contents.find('\n', fn_match.end()) newline = newline if newline > -1 else len(contents) if '@Nolint' in contents[fn_match.start():newline]: continue linenum = 1 + contents.count('\n', 0, fn_match.start()) yield ( filename, linenum, # filename and linenum "Do not use UserProperty, it is not safe. Use UserData's " "key as its foreign key, instead.")
def lint_no_wrong_i18n_markup_in_jinja2(files_to_lint): """Find where we mark js within html with i18n._ instead of {{ _js() Returns: List of triples: (filename, lineno, error message) """ files = lintutil.filter(files_to_lint, prefix='templates/', suffix='.html') for filename in files: contents = lintutil.file_contents(filename) for fn_match in _BAD_JS_MARKUP.finditer(contents): # Make sure there's no @Nolint anywhere around this function. newline = contents.find('\n', fn_match.end()) newline = newline if newline > -1 else len(contents) if '@Nolint' in contents[fn_match.start():newline]: continue for m in _BAD_JS_MARKUP.finditer(fn_match.group(0)): linenum = 1 + contents.count('\n', 0, fn_match.start() + m.start(1)) yield (filename, linenum, 'Do {{ _js("%s") }} instead of %s inside <script> ' 'tags within jinja2.' % (m.group(1), m.group(0)))
def lint_missing_safe_in_jinja2(files_to_lint): """Find instances where we translate html but don't mark it |safe. We html-escape the output of {{ _("...") }} in html templates, which is safe but can cause problems when the text is {{ _("<b>hi</b>") }}. In that case, the user needs to do {{ _("<b>hi</b>")|safe }}. We detect instances where the user does {{ _("...<..") }} in jinja2 templates but lacks the |safe afterwards. Unless that line is marked with {{# @Nolint #}}, we flag it as a lint error. Returns: List of triples: (filename, lineno, error message) """ # Limit files-to-lint to html and txt files under templates/. files = lintutil.filter(files_to_lint, prefix='templates/', suffix=('.html', '.txt')) for filename in files: contents = lintutil.file_contents(filename) for fn_match in _J2_FUNCTION_RE.finditer(contents): # Make sure there's no @Nolint anywhere around this function. newline = contents.find('\n', fn_match.end()) newline = newline if newline > -1 else len(contents) if '@Nolint' in contents[fn_match.start():newline]: continue for m in _GETTEXT_RE.finditer(fn_match.group(0)): if _needs_safe(m.group(1), m.group(2)): linenum = 1 + contents.count('\n', 0, fn_match.start() + m.start(1)) yield (filename, linenum, 'Replace %s with _("..."|safe) to avoid escaping' 'the tag/attr inside the _().' % m.group(1)) for km in _KWARG_RE.finditer(m.group(3)): if _needs_safe(km.group(1), km.group(2)): linenum = 1 + contents.count( '\n', 0, fn_match.start() + m.start() + km.start(1)) yield (filename, linenum, 'Replace %s with _(..., foo="..."|safe) to ' 'avoid escaping the tag/attr inside foo.' % km.group(1)) for m in _NGETTEXT_RE.finditer(fn_match.group(0)): if _needs_safe(m.group(1), m.group(2)): linenum = 1 + contents.count('\n', 0, fn_match.start() + m.start(1)) yield (filename, linenum, 'Replace %s with ngettext("..."|safe, ...) to ' 'avoid escaping the tag/attr inside the _().' % m.group(1)) if _needs_safe(m.group(3), m.group(4)): linenum = 1 + contents.count('\n', 0, fn_match.start() + m.start(3)) yield (filename, linenum, 'Replace %s with ngettext(..., "..."|safe) to ' 'avoid escaping the tag/attr inside the _().' % m.group(3)) for km in _KWARG_RE.finditer(m.group(5)): if _needs_safe(km.group(1), km.group(2)): linenum = 1 + contents.count( '\n', 0, fn_match.start() + m.start() + km.start(1)) yield (filename, linenum, 'Replace %s with ngettext(..., foo="..."|safe)' ' to avoid escaping the tag/attr inside foo.' % km.group(1))
def _lint_unnecessary_late_imports(files_to_lint): """Report on all late imports that could safely be top-level. We make an import "late" -- that is, inside a function rather than at the top level -- when it's needed to avoid circular imports. However, sometimes a refactor makes it so an import doesn't need to be late anymore. That's hard to tell by inspection, so this linter does it for you! Conceptually, this is a standalone linter, so I wrote it like that. However, it doesn't deal well with cycles so we only want to run it when there are no cycles. Thus, we run it at the end of the cycle-checking linter, and then only if there are no cycles. """ files_to_lint = lintutil.filter(files_to_lint, suffix='.py') # Files we shouldn't check for late imports for some reason. # Can end with `.` to match all modules starting with this prefix. MODULE_BLACKLIST = frozenset([ # Is imported first, must do minimal work 'appengine_config', # Minimize deps so new code can import it without fear. 'users.current_user', # Minimize deps to keep `make tesc` output small. 'testutil.gae_model', 'testutil.mapreduce_stub', # Does a bunch of importing after fixing up sys.path 'tools.appengine_tool_setup', 'tools.devservercontext', 'tools.devshell', # TODO(csilvers): remove this and move the babel routines to their # own file instead. 'shared_jinja', # Does importing after modifying modules. 'pickle_util_test', # Has its own style rule stating late imports are preferred 'kake.', ]) # Imports that are allowed to be late for some reason. LATE_IMPORT_BLACKLIST = frozenset([ # Can only import once we've verified we're not running in prod 'sandbox_util', 'mock', 'kake.server_client', 'kake.make', ]) for f in files_to_lint: module = ka_root.relpath(os.path.splitext(f)[0]).replace(os.sep, '.') # Some files are expected to have late imports. if (module in MODULE_BLACKLIST or module.startswith( tuple(m for m in MODULE_BLACKLIST if m.endswith('.')))): continue for late_import in _get_late_imports(module): # Some modules can *only* be late-imported, if late_import in LATE_IMPORT_BLACKLIST: continue # If late_import transitively imports us, then it's not safe # to move it to the top level: that would introduce a cycle. if module in _calculate_transitive_imports(late_import): continue # If a module is marked `@UnusedImport` or `@Nolint`, then # it's being imported for its side effects, and we don't # want to move it to the top level. import_lines = _get_import_lines(f) late_import_line = next( il for il in import_lines if (il.module == late_import and not il.is_toplevel)) contents = lintutil.file_contents(f) lines = contents.splitlines(True) if ('@UnusedImport' in lines[late_import_line.lineno - 1] or '@Nolint' in lines[late_import_line.lineno - 1]): continue # If we get here, it's safe to move this import to the top level! yield (f, late_import_line.lineno, "Make this a top-level import: it doesn't cause cycles") # For the rest of our analysis, assume that `module` has # moved the import of `late_import` to the top level. # TODO(csilvers): figure out the "best" edge to add, rather # than just doing first come first served. _add_edge(module, late_import)
def lint_static_url_modifier(files_to_lint): """Be sure every static url in an .html file uses |static_url or static(). We serve static urls such as images and fonts from kastatic.org, for efficiency. The |static_url and static_url() jinja2 modifiers are ways to rewrite urls to use that. This linter makes sure people remember to do that for new code. """ files = lintutil.filter(files_to_lint, prefix='templates/', suffix='.html') if not files: return # generate_static_urls_regexp matches standalone urls, but we need # to match them in the context of a line, so we need to mess with # ^ and $ a bit. We just get rid of $, which could yield false # positives in theory but doesn't in practice. We replace ^ with # something that matches 'things you might find before the start # of a static url but not before something like an API route name." static_url_regexp = url_util.STATIC_URL_REGEX static_url_regexp = static_url_regexp.replace('$', '') # The '%s' in the regexp below is to catch jinja2 html like: # <img src="{{ "https://%s/images/..." % hostname }}"> static_url_regexp = static_url_regexp.replace( '^/', r'([^\w_./-]["\'/]|khanacademy.org/|%s/)') # It's good when the static-url is followed by '|static_url' (it # needs to be in a '{{ ... }}' for that) or preceded by 'static_url('. # We say the regexp is bad if neither case is present. # This is cheaper than using a negative lookahead (I think). good_regexp = (r'(static_url\s*\(\s*)?(%s)([^}]*\|static_url)?' % static_url_regexp) # We do two checks: one for the static url by name, and one for # any src= parameter (or poster=, which is used for videos). static_url_re = re.compile(good_regexp) # We take advantage of the fact we don't use spaces in our static urls. src_regex = re.compile(r'\b(src|poster)=((?:\{[^\}]*\}|[^ >])*)') for f in files: contents_of_f = lintutil.file_contents(f) error_lines = set() # just to avoid duplicate error messages for m in static_url_re.finditer(contents_of_f): if not m.groups()[0] and not m.groups()[-1]: lineno = 1 + contents_of_f.count('\n', 0, m.start()) # STATIC_URL_REGEX matches the character before the url-string, # and also the opening quote, so get rid of them both. url_prefix = m.group(2)[1:].strip('\'\"') yield (f, lineno, ('The static url "%s..." should use "|static_url" ' + 'after the url or call "static_url()" with the url ' + 'passed in') % url_prefix) error_lines.add(lineno) # emails maybe want to point to ka.org to avoid confusion. # TODO(csilvers): should we point them to kastatic.org too? if '/emails/' not in f: for m in src_regex.finditer(contents_of_f): url = m.group(2).strip('\'\"{} ') if not url: continue # A simple, but effective, effort to find urls that point to # non-ka domains. (Most ka domains we use are .org!) if '.com' in url and '.appspot.com' not in url: continue if url.startswith('data:'): continue # If it's already a url that we serve off the CDN, don't # complain. if any(u in url for (_, u) in url_util.FASTLY_URL_MAP): continue # If it's an <iframe src=...>, don't complain; those are # never static resources. We just do a cheap check. end_of_line = contents_of_f.index('\n', m.end()) if '</iframe>' in contents_of_f[m.end():end_of_line]: continue if '|static_url' not in url: lineno = 1 + contents_of_f.count('\n', 0, m.start()) if lineno not in error_lines: # a new error yield (f, lineno, 'Use |static_url with "%s" attributes' % m.group(1))
def lint_missing_safe_in_jinja2(files_to_lint): """Find instances where we translate html but don't mark it |safe. We html-escape the output of {{ _("...") }} in html templates, which is safe but can cause problems when the text is {{ _("<b>hi</b>") }}. In that case, the user needs to do {{ _("<b>hi</b>")|safe }}. We detect instances where the user does {{ _("...<..") }} in jinja2 templates but lacks the |safe afterwards. Unless that line is marked with {{# @Nolint #}}, we flag it as a lint error. Returns: List of triples: (filename, lineno, error message) """ # Limit files-to-lint to html and txt files under templates/. files = lintutil.filter(files_to_lint, prefix='templates/', suffix=('.html', '.txt')) for filename in files: contents = lintutil.file_contents(filename) for fn_match in _J2_FUNCTION_RE.finditer(contents): # Make sure there's no @Nolint anywhere around this function. newline = contents.find('\n', fn_match.end()) newline = newline if newline > -1 else len(contents) if '@Nolint' in contents[fn_match.start():newline]: continue for m in _GETTEXT_RE.finditer(fn_match.group(0)): if _needs_safe(m.group(1), m.group(2)): linenum = 1 + contents.count('\n', 0, fn_match.start() + m.start(1)) yield (filename, linenum, 'Replace %s with _("..."|safe) to avoid escaping' 'the tag/attr inside the _().' % m.group(1)) for km in _KWARG_RE.finditer(m.group(3)): if _needs_safe(km.group(1), km.group(2)): linenum = 1 + contents.count('\n', 0, fn_match.start() + m.start() + km.start(1)) yield (filename, linenum, 'Replace %s with _(..., foo="..."|safe) to ' 'avoid escaping the tag/attr inside foo.' % km.group(1)) for m in _NGETTEXT_RE.finditer(fn_match.group(0)): if _needs_safe(m.group(1), m.group(2)): linenum = 1 + contents.count('\n', 0, fn_match.start() + m.start(1)) yield (filename, linenum, 'Replace %s with ngettext("..."|safe, ...) to ' 'avoid escaping the tag/attr inside the _().' % m.group(1)) if _needs_safe(m.group(3), m.group(4)): linenum = 1 + contents.count('\n', 0, fn_match.start() + m.start(3)) yield (filename, linenum, 'Replace %s with ngettext(..., "..."|safe) to ' 'avoid escaping the tag/attr inside the _().' % m.group(3)) for km in _KWARG_RE.finditer(m.group(5)): if _needs_safe(km.group(1), km.group(2)): linenum = 1 + contents.count('\n', 0, fn_match.start() + m.start() + km.start(1)) yield (filename, linenum, 'Replace %s with ngettext(..., foo="..."|safe)' ' to avoid escaping the tag/attr inside foo.' % km.group(1))
def lint_not_using_gettext_at_import_time(files_to_lint): """Make sure we don't use i18n._/etc in a static context. If you have a global variable such as '_FOO = i18n._("bar")', at the top of some .py file, it won't work the way you intend because i18n._() needs to be called while handling a request in order to know what language to translate to. (Instead, you'd need to do _FOO = lambda: i18n._("bar") or some such.) This tests for this by mocking i18n._ et al., and then importing everything (but running nothing). Any i18n._ calls that happen during this import are problematic! We have to spawn a new python process to make sure we do the importing properly (and without messing with the currently running python environment!) """ candidate_files_to_lint = lintutil.filter(files_to_lint, suffix='.py') files_to_lint = [] for filename in candidate_files_to_lint: contents = lintutil.file_contents(filename) # Check that it's plausible this file uses i18n._ or similar. # This also avoids importing random third-party files that may # have nasty side-effects at import time (all our code is too # well-written to do that!) if 'import intl' in contents or 'from intl' in contents: files_to_lint.append(filename) program = """\ import os # @Nolint(linter can't tell this is in a string!) import sys # @Nolint(linter can't tell this is in a string!) import traceback import intl.request # @Nolint(seems unused to our linter but it's used) _ROOT = "%s" def add_lint_error(f): # We assume code in 'intl' doesn't make this mistake, and thus # the first stack-frame before we get into 'intl' is the # offending code. ctx == '<string>' means the error occurred in # this pseudo-script. for (ctx, lineno, fn, line) in reversed(traceback.extract_stack()): if os.path.isabs(ctx): ctx = os.path.relpath(ctx, _ROOT) if ctx != '<string>' and not ctx.startswith('intl/'): if ctx == f: print 'GETTEXT ERROR {} {}'.format(ctx, lineno) break return 'en' # a fake value for intl.request.ka_locale """ % ka_root.root if not files_to_lint: return for filename in files_to_lint: modulename = ka_root.relpath(filename) modulename = os.path.splitext(modulename)[0] # nix .py modulename = modulename.replace('/', '.') # Force a re-import. program += 'sys.modules.pop("%s", None)\n' % modulename program += ('intl.request.ka_locale = lambda: add_lint_error("%s")\n' % ka_root.relpath(filename)) program += 'import %s\n' % modulename p = subprocess.Popen( ['env', 'PYTHONPATH=%s' % ':'.join(sys.path), sys.executable, '-c', program], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) p.wait() lint_output = p.stdout.read() for line in lint_output.splitlines(): if line.startswith('GETTEXT ERROR '): line = line[len('GETTEXT ERROR '):] (filename, linenum) = line.rsplit(' ', 1) yield (ka_root.join(filename), int(linenum), 'Trying to translate at import-time, but ' 'translation only works at runtime! ' 'Use intl.i18n.mark_for_translation() instead.')
def lint_templates_are_translated(files_to_lint): """Verify that nltext in the input templates are marked for translation. All natural-language text in jinja2 and handlebars files should be marked for translation, using {{ _("...") }} or {{#_}}...{{/_}}. i18nize_templates.py is a tool that can do this for you automatically. We run this tool in 'check' mode to verify that every input file is already marked up appropriately. Since i18nize_templates isn't perfect (it thinks you need to translate text like 'Lebron James' or 'x' when used on a 'close' button), you can use nolint-like functionality to tell this linter it's ok if some text is not marked up to be translated. Unlike other tests though, we do not use the @Nolint directive for this, but instead wrap the relevant text in {{ i18n_do_not_translate(...) }} or {{#i18nDoNotTranslate}}...{{/i18nDoNotTranslate}} """ # Add some ka-specific function we know do not have nltext arguments. i18nize_templates.mark_function_args_lack_nltext( 'js_css_packages.package', 'js_css_packages.script', 'handlebars_template', 'youtube.player_embed', 'log.date.strftime', 'emails.tracking_image_url', 'templatetags.to_canonical_url', 'render_react', ) for f in files_to_lint: abs_f = f f = ka_root.relpath(f) # Exclude files that we don't need to translate: we don't care # if those files are 'properly' marked up or not. if intl.english_only.should_not_translate_file(f): continue if (f.startswith('templates' + os.sep) and (f.endswith('.html') or f.endswith('.txt'))): # jinja2 template parser = i18nize_templates.get_parser_for_file(f) correction = 'wrap the text in {{ i18n_do_not_translate() }}' elif f.endswith('.handlebars'): # handlebars template parser = i18nize_templates.get_parser_for_file(f) correction = ('wrap the text in {{#i18nDoNotTranslate}}...' '{{/i18nDoNotTranslate}}') else: continue file_contents = lintutil.file_contents(abs_f) try: parsed_output = parser.parse( file_contents.decode('utf-8')).encode('utf-8') except i18nize_templates.HTMLParser.HTMLParseError, why: m = re.search(r'at line (\d+)', str(why)) linenum = int(m.group(1)) if m else 1 yield (abs_f, linenum, '"i18nize_templates.py %s" fails: %s' % (f, why)) continue orig_lines = file_contents.splitlines() parsed_lines = parsed_output.splitlines() for i in xrange(len(orig_lines)): if orig_lines[i] != parsed_lines[i]: yield (abs_f, i + 1, 'Missing _(); run tools/i18nize_templates.py or %s ' '(expecting "%s")' % (correction, parsed_lines[i].strip()))
def lint_not_using_gettext_at_import_time(files_to_lint): """Make sure we don't use i18n._/etc in a static context. If you have a global variable such as '_FOO = i18n._("bar")', at the top of some .py file, it won't work the way you intend because i18n._() needs to be called while handling a request in order to know what language to translate to. (Instead, you'd need to do _FOO = lambda: i18n._("bar") or some such.) This tests for this by mocking i18n._ et al., and then importing everything (but running nothing). Any i18n._ calls that happen during this import are problematic! We have to spawn a new python process to make sure we do the importing properly (and without messing with the currently running python environment!) """ candidate_files_to_lint = lintutil.filter(files_to_lint, suffix='.py') files_to_lint = [] for filename in candidate_files_to_lint: contents = lintutil.file_contents(filename) # Check that it's plausible this file uses i18n._ or similar. # This also avoids importing random third-party files that may # have nasty side-effects at import time (all our code is too # well-written to do that!) if 'import intl' in contents or 'from intl' in contents: files_to_lint.append(filename) program = """\ import os # @Nolint(linter can't tell this is in a string!) import sys # @Nolint(linter can't tell this is in a string!) import traceback import intl.request # @Nolint(seems unused to our linter but it's used) _ROOT = "%s" def add_lint_error(f): # We assume code in 'intl' doesn't make this mistake, and thus # the first stack-frame before we get into 'intl' is the # offending code. ctx == '<string>' means the error occurred in # this pseudo-script. for (ctx, lineno, fn, line) in reversed(traceback.extract_stack()): if os.path.isabs(ctx): ctx = os.path.relpath(ctx, _ROOT) if ctx != '<string>' and not ctx.startswith('intl/'): if ctx == f: print 'GETTEXT ERROR {} {}'.format(ctx, lineno) break return 'en' # a fake value for intl.request.ka_locale """ % ka_root.root if not files_to_lint: return for filename in files_to_lint: modulename = ka_root.relpath(filename) modulename = os.path.splitext(modulename)[0] # nix .py modulename = modulename.replace('/', '.') # Force a re-import. program += 'sys.modules.pop("%s", None)\n' % modulename program += ('intl.request.ka_locale = lambda: add_lint_error("%s")\n' % ka_root.relpath(filename)) program += 'import %s\n' % modulename p = subprocess.Popen([ 'env', 'PYTHONPATH=%s' % ':'.join(sys.path), sys.executable, '-c', program ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) p.wait() lint_output = p.stdout.read() for line in lint_output.splitlines(): if line.startswith('GETTEXT ERROR '): line = line[len('GETTEXT ERROR '):] (filename, linenum) = line.rsplit(' ', 1) yield (ka_root.join(filename), int(linenum), 'Trying to translate at import-time, but ' 'translation only works at runtime! ' 'Use intl.i18n.mark_for_translation() instead.')