Beispiel #1
0
def lint_absolute_import(files_to_lint):
    """Make sure we use absolute imports everywhere.

    Suppose you have code like a top-level util.py and also
    email/util.py, and you do "import util".  If you're doing that
    from email/foo.py, then it will import email/util.py instead of
    the top-level util.py.  Using absolute imports everywhere avoids
    the risk of that problem.  It also makes it more obvious to those
    looking at the code, exactly what's being imported.
    """
    files_to_lint = lintutil.filter(files_to_lint, suffix='.py')

    for f in files_to_lint:
        import_lines = _get_import_lines(f)

        if not import_lines:
            # No imports?  Then no need for an absolute-import directive.
            continue

        if any(i.module == '__future__.absolute_import' for i in import_lines):
            # Has an absolute-import, we're happy.
            continue

        contents = lintutil.file_contents(f)
        if contents.startswith('#!'):
            # If the file can be run as a script, then absolute-import
            # doesn't always work since we don't automatically have
            # webapp-root in the pythonpath for scripts.  So we don't
            # require absolute-import in that case.
            continue

        # __future__ imports come first, so the best line number to
        # report for the error is the first import line.
        yield (f, import_lines[0].lineno,
               'Modules must use: from __future__ import absolute_import')
Beispiel #2
0
def lint_js_files_are_translated(files_to_lint):
    """Verify that nltext in the js files are marked for translation.

    See docstring of: _lint_js_content

    Returns:
       List of triples: (filename, lineno, error message)
    """
    # Make sure jsx files are compiled first, then we will lint the resulting
    # js.
    kake.make.build_many([
        ('genfiles/compiled_jsx/en/%s.js' % ka_root.relpath(f), {})
        for f in files_to_lint if f.endswith('.jsx')
    ])

    files_to_lint = lintutil.filter(files_to_lint, suffix=('.js', '.jsx'))
    for f in files_to_lint:
        abs_f = f
        f = ka_root.relpath(f)

        # Exclude files that we don't need to translate: we don't care
        # if those files are 'properly' marked up or not.
        if intl.english_only.should_not_translate_file(f):
            continue

        if f.endswith(".jsx"):
            abs_f = "%s/genfiles/compiled_jsx/en/%s.js" % (ka_root.root, f)
            f = ka_root.relpath(abs_f)

        file_contents = lintutil.file_contents(abs_f)

        for error in _lint_js_content(abs_f, file_contents):
            yield error
Beispiel #3
0
def lint_non_literal_i18n_in_javascript(files_to_lint):
    """Complain about uses of i18n._() on something other than a string.

    i18n._(variable) is dangerous -- you don't know if the variable
    has been translated or not.
    """
    files_to_lint = lintutil.filter(files_to_lint,
                                    suffix=('.js', '.jsx'),
                                    exclude_substrings=('/i18n.js',
                                                        '/i18n_test.js'))

    # This regexp pattern captures a string, possibly concatenated with +'s.
    js_str = (r'(?:' + r'"(?:\\.|[^"])*"|' + r"'(?:\\.|[^'])*'|" +
              r'`(?:\\.|[^`])*`' + ')')
    js_concat_str = '\s*%s(?:\s*\+\s*%s)*\s*' % (js_str, js_str)

    gettext_occurrences = re.compile(r'\b(i18n._|i18n.ngettext)\(')
    valid_gettext_occurrences = re.compile(
        r'\bi18n._\(%(str)s[,)]|\bi18n.ngettext\(%(str)s,\s*%(str)s[,)]' %
        {'str': js_concat_str})

    for f in files_to_lint:
        contents = lintutil.file_contents(f)
        all_occurrences = {
            m.start(): m
            for m in gettext_occurrences.finditer(contents)
        }
        valid_occurrences = {
            m.start(): m
            for m in valid_gettext_occurrences.finditer(contents)
        }

        for (startpos, m) in all_occurrences.iteritems():
            i18n_fn = m.group(1)
            msg = None  # set to non-None if there's a problem.

            if startpos not in valid_occurrences:
                msg = ('%s must have string literals as arguments, '
                       'with no variables or templates' % i18n_fn)
            else:
                # Then we're ok with this!  *Unless* it's a template string
                # with $(...) inside it, then we're not ok.
                m2 = valid_occurrences[startpos]
                if m2.group().count(r'${') > m2.group().count(r'\${'):
                    msg = ('You must use %' + '(...)s with template strings '
                           'inside %s, not ${...}' % i18n_fn)

            if msg:
                start_lineno = 1 + contents.count('\n', 0, startpos)
                # Doing a real regexp to find the end of this function call
                # is tough, we just do something simple and pray.
                end_paren = contents.find(')', startpos)
                if end_paren == -1:
                    end_paren = len(contents)
                end_lineno = 1 + contents.count('\n', 0, end_paren)
                if any(
                        lintutil.has_nolint(f, lineno)
                        for lineno in xrange(start_lineno, end_lineno + 1)):
                    continue
                yield (f, start_lineno, msg)
Beispiel #4
0
def lint_js_files_are_translated(files_to_lint):
    """Verify that nltext in the js files are marked for translation.

    See docstring of: _lint_js_content

    Returns:
       List of triples: (filename, lineno, error message)
    """
    # Make sure jsx files are compiled first, then we will lint the resulting
    # js.
    kake.make.build_many([('genfiles/compiled_jsx/en/%s.js' %
                           ka_root.relpath(f), {})
                          for f in files_to_lint if f.endswith('.jsx')])

    files_to_lint = lintutil.filter(files_to_lint, suffix=('.js', '.jsx'))
    for f in files_to_lint:
        abs_f = f
        f = ka_root.relpath(f)

        # Exclude files that we don't need to translate: we don't care
        # if those files are 'properly' marked up or not.
        if intl.english_only.should_not_translate_file(f):
            continue

        if f.endswith(".jsx"):
            abs_f = "%s/genfiles/compiled_jsx/en/%s.js" % (ka_root.root, f)
            f = ka_root.relpath(abs_f)

        file_contents = lintutil.file_contents(abs_f)

        for error in _lint_js_content(abs_f, file_contents):
            yield error
def lint_every_rendered_component_has_a_fixture(files_to_lint):
    """Check that every component we render has an associated fixture file.

    In order to test that a particular react component can be
    server-side rendered, we need to actually try to render it with a
    particular value for props.  This is what component.fixture.js
    files are for.  We just make sure the people write them!

    For now, we allow the fixture file to be empty (just `[]`).  Later
    we may insist on actually useful fixtures.
    """
    files_to_lint = lintutil.filter(files_to_lint, suffix='.html')

    for f in files_to_lint:
        contents_of_f = lintutil.file_contents(f)
        for m in RENDER_REACT_RE.finditer(contents_of_f):
            component_file = m.group(1)
            # To be server-side renderable, the fixture file has to be
            # a javascript file, not jsx or something else.
            fixture_file = component_file + '.fixture.js'
            if not os.path.exists(ka_root.join(fixture_file)):
                linenum = contents_of_f.count('\n', 0, m.start()) + 1
                yield (f, linenum,
                       '%s must have an associated fixture file %s' %
                       (component_file, fixture_file))
Beispiel #6
0
def lint_every_rendered_component_has_a_fixture(files_to_lint):
    """Check that every component we render has an associated fixture file.

    In order to test that a particular react component can be
    server-side rendered, we need to actually try to render it with a
    particular value for props.  This is what component.fixture.js
    files are for.  We just make sure the people write them!

    For now, we allow the fixture file to be empty (just `[]`).  Later
    we may insist on actually useful fixtures.
    """
    files_to_lint = lintutil.filter(files_to_lint, suffix='.html')

    for f in files_to_lint:
        contents_of_f = lintutil.file_contents(f)
        for m in RENDER_REACT_RE.finditer(contents_of_f):
            component_file = m.group(1)
            # To be server-side renderable, the fixture file has to be
            # a javascript file, not jsx or something else.
            fixture_file = component_file + '.fixture.js'
            if not os.path.exists(ka_root.join(fixture_file)):
                linenum = contents_of_f.count('\n', 0, m.start()) + 1
                yield (f, linenum,
                       '%s must have an associated fixture file %s'
                       % (component_file, fixture_file))
def lint_duplicate_test_functions(files_to_lint):
    """Enforce that no two test functions in the same class have the same name.

    This prevents one test from not running because another test overwrites it.
    """
    files = lintutil.filter(files_to_lint, suffix='_test.py')

    for filename in files:
        contents = lintutil.file_contents(filename)

        functions = _matches_with_line_numbers(_TEST_FN_RE, contents)
        classes = _matches_with_line_numbers(_CLASS_NAME_RE, contents)

        lines = collections.defaultdict(list)

        for fname, line in functions:
            cls = _find_class_at_line(classes, line)
            if cls is None:
                continue
            qualified_fname = cls + '.' + fname
            lines[qualified_fname] += [line]

        for fname, dup_lines in lines.iteritems():
            if len(dup_lines) > 1:
                if any(
                        lintutil.has_nolint(filename, line)
                        for line in dup_lines):
                    continue

                yield (filename, max(dup_lines),
                       'Duplicate test function name %(name)s'
                       ' at lines %(lines)s' % {
                           'name': fname,
                           'lines': ', '.join(str(l) for l in dup_lines)
                       })
def lint_duplicate_test_functions(files_to_lint):
    """Enforce that no two test functions in the same class have the same name.

    This prevents one test from not running because another test overwrites it.
    """
    files = lintutil.filter(files_to_lint, suffix='_test.py')

    for filename in files:
        contents = lintutil.file_contents(filename)

        functions = _matches_with_line_numbers(_TEST_FN_RE, contents)
        classes = _matches_with_line_numbers(_CLASS_NAME_RE, contents)

        lines = collections.defaultdict(list)

        for fname, line in functions:
            cls = _find_class_at_line(classes, line)
            if cls is None:
                continue
            qualified_fname = cls + '.' + fname
            lines[qualified_fname] += [line]

        for fname, dup_lines in lines.iteritems():
            if len(dup_lines) > 1:
                if any(lintutil.has_nolint(filename, line)
                       for line in dup_lines):
                    continue

                yield (filename, max(dup_lines),
                       'Duplicate test function name %(name)s'
                       ' at lines %(lines)s' % {
                           'name': fname,
                           'lines': ', '.join(str(l) for l in dup_lines)
                       })
Beispiel #9
0
def lint_javascript_in_html(files_to_lint):
    """Run eslint on javascript content inside html files.

    We want to make sure that the js in our html files has no unused
    variables/etc, so we can do better dependency analysis on it.
    """
    files = lintutil.filter(files_to_lint, prefix='templates/', suffix='.html')

    lint_inputs = []      # list of filename/contents pairs

    for f in files:
        contents_of_f = lintutil.file_contents(f)

        # extract_js_from_html actually can return several versions
        # of the js contents, each with a different branch of an
        # if/else commented out.  (So for input like
        #  <script>var x = { {%if c%}y: 4{%else%}y: 5{%endif%} };</script>
        # we'd see both 'var x = { y: 4 };' and 'var x = { y: 5 }')
        # We lint all such strings and combine the output.
        js_contents_iter = js_in_html.extract_js_from_html(contents_of_f,
                                                           "jinja2",
                                                           file_name=f)

        try:
            for js_contents in js_contents_iter:
                lint_inputs.append((f, js_contents))
        except Exception, why:
            yield (f, 1, 'HTML parse error: %s' % why)
            continue
Beispiel #10
0
def lint_package_imports(files_to_lint):
    """Find cases where we import a package (directory) rather than a module.

    For instance, "import emails".  That is not a module, it's a
    directory.

    Sometimes importing a directory is correct, for third-party
    modules with non-trivial __init__.py's.  But it's never right for
    our code.
    """
    files_to_lint = lintutil.filter(files_to_lint, suffix='.py')

    for f in files_to_lint:
        import_lines = _get_import_lines(f)
        for import_line in import_lines:
            if import_line.module.startswith(('__future__', 'third_party')):
                # The rules are different for third-party code, and
                # __future__'s aren't even real modules.
                continue
            if _is_a_first_party_package(import_line.module):
                contents = lintutil.file_contents(f)
                corrections = re.findall(r'%s\.[\w_]+' % import_line.name,
                                         contents)
                yield (
                    f, import_line.lineno,
                    "Do not import the whole directory '%s'; "
                    "import the modules inside it, possibly: %s" %
                    (import_line.module, ', '.join(sorted(set(corrections)))))
Beispiel #11
0
def lint_non_literal_i18n_do_not_translate(files_to_lint):
    """Find where we mark html as not needing translation but its non-literal

    We require anything we mark with i18n_do_not_translate to be only a string
    literal. i18n_do_not_translate marks the string as "safe" (that is jinja2
    won't autoescape it) so we need to be extra careful we don't create a
    potential XSS attack vector if we end up marking some variable as
    safe.

    Unless that line is marked with {{# @Nolint #}}, we flag it as a lint
    error.

    Returns:
       List of triples: (filename, lineno, error message)
    """
    # Limit files-to-lint to files under templates/ that are html or txt.
    files = lintutil.filter(files_to_lint,
                            prefix='templates/', suffix=('.html', '.txt'))

    for filename in files:
        contents = lintutil.file_contents(filename)
        for fn_match in _J2_FUNCTION_RE.finditer(contents):
            # Make sure there's no @Nolint anywhere around this function.
            newline = contents.find('\n', fn_match.end())
            newline = newline if newline > -1 else len(contents)
            if '@Nolint' in contents[fn_match.start():newline]:
                continue

            for m in _BAD_DO_NOT_TRANSLATE_RE.finditer(fn_match.group(0)):
                linenum = 1 + contents.count('\n', 0,
                                             fn_match.start() + m.start(1))
                yield (filename, linenum,
                       '%s contains something that is not just a '
                       'literal string. Only literal strings can be '
                       'inside i18n_do_not_translate.' % m.group(1))
Beispiel #12
0
def lint_non_literal_i18n_in_javascript(files_to_lint):
    """Complain about uses of i18n._() on something other than a string.

    i18n._(variable) is dangerous -- you don't know if the variable
    has been translated or not.
    """
    files_to_lint = lintutil.filter(
        files_to_lint, suffix=('.js', '.jsx'),
        exclude_substrings=('/i18n.js', '/i18n_test.js'))

    # This regexp pattern captures a string, possibly concatenated with +'s.
    js_str = (r'(?:' +
              r'"(?:\\.|[^"])*"|' +
              r"'(?:\\.|[^'])*'|" +
              r'`(?:\\.|[^`])*`' +
              ')')
    js_concat_str = '\s*%s(?:\s*\+\s*%s)*\s*' % (js_str, js_str)

    gettext_occurrences = re.compile(r'\b(i18n._|i18n.ngettext)\(')
    valid_gettext_occurrences = re.compile(
        r'\bi18n._\(%(str)s[,)]|\bi18n.ngettext\(%(str)s,\s*%(str)s[,)]'
        % {'str': js_concat_str})

    for f in files_to_lint:
        contents = lintutil.file_contents(f)
        all_occurrences = {
            m.start(): m for m in gettext_occurrences.finditer(contents)}
        valid_occurrences = {
            m.start(): m for m in valid_gettext_occurrences.finditer(contents)}

        for (startpos, m) in all_occurrences.iteritems():
            i18n_fn = m.group(1)
            msg = None      # set to non-None if there's a problem.

            if startpos not in valid_occurrences:
                msg = ('%s must have string literals as arguments, '
                       'with no variables or templates' % i18n_fn)
            else:
                # Then we're ok with this!  *Unless* it's a template string
                # with $(...) inside it, then we're not ok.
                m2 = valid_occurrences[startpos]
                if m2.group().count(r'${') > m2.group().count(r'\${'):
                    msg = ('You must use %' + '(...)s with template strings '
                           'inside %s, not ${...}' % i18n_fn)

            if msg:
                start_lineno = 1 + contents.count('\n', 0, startpos)
                # Doing a real regexp to find the end of this function call
                # is tough, we just do something simple and pray.
                end_paren = contents.find(')', startpos)
                if end_paren == -1:
                    end_paren = len(contents)
                end_lineno = 1 + contents.count('\n', 0, end_paren)
                if any(lintutil.has_nolint(f, lineno)
                       for lineno in xrange(start_lineno, end_lineno + 1)):
                    continue
                yield (f, start_lineno, msg)
Beispiel #13
0
 def _find_unused_imports(root, module_parts_so_far):
     """Yields (module_name, lineno) pairs of errors."""
     for (part, subtree) in root.iteritems():
         if isinstance(subtree, dict):  # Recurse
             for error in _find_unused_imports(
                     subtree, module_parts_so_far + [part]):
                 yield error
         elif subtree == 'USED':  # Used import
             pass
         else:  # Unused import
             contents = lintutil.file_contents(f)
             lines = contents.splitlines(True)
             # Here, `subtree` stores the line the import was on
             lineno = subtree
             # If it has an "@UnusedImport" decorator, unused == ok!
             if '@UnusedImport' not in lines[lineno - 1]:
                 yield (".".join(module_parts_so_far + [part]), lineno)
Beispiel #14
0
def lint_no_user_property(files_to_lint):
    """Enforce that nobody uses UserProperty.

    ...unless marked as an explicitly approved legacy usage via @Nolint.
    """
    files_to_lint = lintutil.filter(files_to_lint, suffix='.py')
    for filename in files_to_lint:
        contents = lintutil.file_contents(filename)
        for fn_match in _USERPROPERTY_RE.finditer(contents):
            # Make sure there's no @Nolint anywhere around this function.
            newline = contents.find('\n', fn_match.end())
            newline = newline if newline > -1 else len(contents)
            if '@Nolint' in contents[fn_match.start():newline]:
                continue

            linenum = 1 + contents.count('\n', 0, fn_match.start())
            yield (filename, linenum,      # filename and linenum
                   "Do not use UserProperty, it is not safe. Use UserData's "
                   "key as its foreign key, instead.")
Beispiel #15
0
def lint_iife_in_html_javascript(files_to_lint):
    """Make sure js in <script> tags doesn't leak globals by using an IIFE.

    IIFE is when you surround js code in '(function() { ... })();'.
    This prevents any variable declarations from polluting the global
    scope.  We require all <script> tags to have an IIFE if they
    declare a variable.
    """
    files = lintutil.filter(files_to_lint, prefix='templates/', suffix='.html')

    # For determining if we need an IIFE, we only care about 'var'
    # statements.
    var_re = re.compile(r'\bvar\b')

    for f in files:
        contents_of_f = lintutil.file_contents(f)

        # extract_js_from_html actually can return several versions
        # of the js contents, each with a different branch of an
        # if/else commented out.  (So for input like
        #  <script>var x = { {%if c%}y: 4{%else%}y: 5{%endif%} };</script>
        # we'd see both 'var x = { y: 4 };' and 'var x = { y: 5 }')
        # We lint all such strings and combine the output.
        js_contents_iter = js_in_html.extract_js_from_html(contents_of_f,
                                                           "jinja2",
                                                           keep_re=var_re,
                                                           file_name=f)
        try:
            for js_contents in js_contents_iter:
                if (not _IIFE_RE.match(js_contents) and
                        var_re.search(js_contents)):
                    # Our script is the first non-blank line in the
                    # extracted output.
                    lineno = len(js_contents) - len(js_contents.lstrip('\n'))
                    if not lintutil.has_nolint(f, lineno + 1):
                        yield (f, lineno + 1,
                               "Must wrap this script's contents in "
                               "'(function() { ... })();' (IIFE) to "
                               "avoid leaking vars to global state")
        except Exception, why:
            yield (f, 1, 'HTML parse error: %s' % why)
            continue
def lint_no_user_property(files_to_lint):
    """Enforce that nobody uses UserProperty.

    ...unless marked as an explicitly approved legacy usage via @Nolint.
    """
    files_to_lint = lintutil.filter(files_to_lint, suffix='.py')
    for filename in files_to_lint:
        contents = lintutil.file_contents(filename)
        for fn_match in _USERPROPERTY_RE.finditer(contents):
            # Make sure there's no @Nolint anywhere around this function.
            newline = contents.find('\n', fn_match.end())
            newline = newline if newline > -1 else len(contents)
            if '@Nolint' in contents[fn_match.start():newline]:
                continue

            linenum = 1 + contents.count('\n', 0, fn_match.start())
            yield (
                filename,
                linenum,  # filename and linenum
                "Do not use UserProperty, it is not safe. Use UserData's "
                "key as its foreign key, instead.")
Beispiel #17
0
def lint_no_wrong_i18n_markup_in_jinja2(files_to_lint):
    """Find where we mark js within html with i18n._ instead of {{ _js()

    Returns:
       List of triples: (filename, lineno, error message)
    """
    files = lintutil.filter(files_to_lint, prefix='templates/', suffix='.html')
    for filename in files:
        contents = lintutil.file_contents(filename)
        for fn_match in _BAD_JS_MARKUP.finditer(contents):
            # Make sure there's no @Nolint anywhere around this function.
            newline = contents.find('\n', fn_match.end())
            newline = newline if newline > -1 else len(contents)
            if '@Nolint' in contents[fn_match.start():newline]:
                continue

            for m in _BAD_JS_MARKUP.finditer(fn_match.group(0)):
                linenum = 1 + contents.count('\n', 0,
                                             fn_match.start() + m.start(1))
                yield (filename, linenum,
                       'Do {{ _js("%s") }} instead of %s inside <script> '
                       'tags within jinja2.' % (m.group(1), m.group(0)))
Beispiel #18
0
def lint_no_wrong_i18n_markup_in_jinja2(files_to_lint):
    """Find where we mark js within html with i18n._ instead of {{ _js()

    Returns:
       List of triples: (filename, lineno, error message)
    """
    files = lintutil.filter(files_to_lint, prefix='templates/', suffix='.html')
    for filename in files:
        contents = lintutil.file_contents(filename)
        for fn_match in _BAD_JS_MARKUP.finditer(contents):
            # Make sure there's no @Nolint anywhere around this function.
            newline = contents.find('\n', fn_match.end())
            newline = newline if newline > -1 else len(contents)
            if '@Nolint' in contents[fn_match.start():newline]:
                continue

            for m in _BAD_JS_MARKUP.finditer(fn_match.group(0)):
                linenum = 1 + contents.count('\n', 0,
                                             fn_match.start() + m.start(1))
                yield (filename, linenum,
                       'Do {{ _js("%s") }} instead of %s inside <script> '
                       'tags within jinja2.' % (m.group(1), m.group(0)))
Beispiel #19
0
def lint_non_literal_i18n_do_not_translate(files_to_lint):
    """Find where we mark html as not needing translation but its non-literal

    We require anything we mark with i18n_do_not_translate to be only a string
    literal. i18n_do_not_translate marks the string as "safe" (that is jinja2
    won't autoescape it) so we need to be extra careful we don't create a
    potential XSS attack vector if we end up marking some variable as
    safe.

    Unless that line is marked with {{# @Nolint #}}, we flag it as a lint
    error.

    Returns:
       List of triples: (filename, lineno, error message)
    """
    # Limit files-to-lint to files under templates/ that are html or txt.
    files = lintutil.filter(files_to_lint,
                            prefix='templates/',
                            suffix=('.html', '.txt'))

    for filename in files:
        contents = lintutil.file_contents(filename)
        for fn_match in _J2_FUNCTION_RE.finditer(contents):
            # Make sure there's no @Nolint anywhere around this function.
            newline = contents.find('\n', fn_match.end())
            newline = newline if newline > -1 else len(contents)
            if '@Nolint' in contents[fn_match.start():newline]:
                continue

            for m in _BAD_DO_NOT_TRANSLATE_RE.finditer(fn_match.group(0)):
                linenum = 1 + contents.count('\n', 0,
                                             fn_match.start() + m.start(1))
                yield (filename, linenum,
                       '%s contains something that is not just a '
                       'literal string. Only literal strings can be '
                       'inside i18n_do_not_translate.' % m.group(1))
Beispiel #20
0
def lint_missing_safe_in_jinja2(files_to_lint):
    """Find instances where we translate html but don't mark it |safe.

    We html-escape the output of {{ _("...") }} in html templates, which
    is safe but can cause problems when the text is {{ _("<b>hi</b>") }}.
    In that case, the user needs to do {{ _("<b>hi</b>")|safe }}.

    We detect instances where the user does {{ _("...<..") }} in jinja2
    templates but lacks the |safe afterwards.  Unless that line is marked
    with {{# @Nolint #}}, we flag it as a lint error.

    Returns:
       List of triples: (filename, lineno, error message)
    """
    # Limit files-to-lint to html and txt files under templates/.
    files = lintutil.filter(files_to_lint,
                            prefix='templates/',
                            suffix=('.html', '.txt'))

    for filename in files:
        contents = lintutil.file_contents(filename)
        for fn_match in _J2_FUNCTION_RE.finditer(contents):
            # Make sure there's no @Nolint anywhere around this function.
            newline = contents.find('\n', fn_match.end())
            newline = newline if newline > -1 else len(contents)
            if '@Nolint' in contents[fn_match.start():newline]:
                continue

            for m in _GETTEXT_RE.finditer(fn_match.group(0)):
                if _needs_safe(m.group(1), m.group(2)):
                    linenum = 1 + contents.count('\n', 0,
                                                 fn_match.start() + m.start(1))
                    yield (filename, linenum,
                           'Replace %s with _("..."|safe) to avoid escaping'
                           'the tag/attr inside the _().' % m.group(1))
                for km in _KWARG_RE.finditer(m.group(3)):
                    if _needs_safe(km.group(1), km.group(2)):
                        linenum = 1 + contents.count(
                            '\n', 0,
                            fn_match.start() + m.start() + km.start(1))
                        yield (filename, linenum,
                               'Replace %s with _(..., foo="..."|safe) to '
                               'avoid escaping the tag/attr inside foo.' %
                               km.group(1))

            for m in _NGETTEXT_RE.finditer(fn_match.group(0)):
                if _needs_safe(m.group(1), m.group(2)):
                    linenum = 1 + contents.count('\n', 0,
                                                 fn_match.start() + m.start(1))
                    yield (filename, linenum,
                           'Replace %s with ngettext("..."|safe, ...) to '
                           'avoid escaping the tag/attr inside the _().' %
                           m.group(1))
                if _needs_safe(m.group(3), m.group(4)):
                    linenum = 1 + contents.count('\n', 0,
                                                 fn_match.start() + m.start(3))
                    yield (filename, linenum,
                           'Replace %s with ngettext(..., "..."|safe) to '
                           'avoid escaping the tag/attr inside the _().' %
                           m.group(3))
                for km in _KWARG_RE.finditer(m.group(5)):
                    if _needs_safe(km.group(1), km.group(2)):
                        linenum = 1 + contents.count(
                            '\n', 0,
                            fn_match.start() + m.start() + km.start(1))
                        yield (filename, linenum,
                               'Replace %s with ngettext(..., foo="..."|safe)'
                               ' to avoid escaping the tag/attr inside foo.' %
                               km.group(1))
Beispiel #21
0
def _lint_unnecessary_late_imports(files_to_lint):
    """Report on all late imports that could safely be top-level.

    We make an import "late" -- that is, inside a function rather than
    at the top level -- when it's needed to avoid circular imports.
    However, sometimes a refactor makes it so an import doesn't need to
    be late anymore.  That's hard to tell by inspection, so this linter
    does it for you!

    Conceptually, this is a standalone linter, so I wrote it like that.
    However, it doesn't deal well with cycles so we only want to run it
    when there are no cycles.  Thus, we run it at the end of the
    cycle-checking linter, and then only if there are no cycles.
    """
    files_to_lint = lintutil.filter(files_to_lint, suffix='.py')

    # Files we shouldn't check for late imports for some reason.
    # Can end with `.` to match all modules starting with this prefix.
    MODULE_BLACKLIST = frozenset([
        # Is imported first, must do minimal work
        'appengine_config',
        # Minimize deps so new code can import it without fear.
        'users.current_user',
        # Minimize deps to keep `make tesc` output small.
        'testutil.gae_model',
        'testutil.mapreduce_stub',
        # Does a bunch of importing after fixing up sys.path
        'tools.appengine_tool_setup',
        'tools.devservercontext',
        'tools.devshell',
        # TODO(csilvers): remove this and move the babel routines to their
        # own file instead.
        'shared_jinja',
        # Does importing after modifying modules.
        'pickle_util_test',
        # Has its own style rule stating late imports are preferred
        'kake.',
    ])

    # Imports that are allowed to be late for some reason.
    LATE_IMPORT_BLACKLIST = frozenset([
        # Can only import once we've verified we're not running in prod
        'sandbox_util',
        'mock',
        'kake.server_client',
        'kake.make',
    ])

    for f in files_to_lint:
        module = ka_root.relpath(os.path.splitext(f)[0]).replace(os.sep, '.')
        # Some files are expected to have late imports.
        if (module in MODULE_BLACKLIST or module.startswith(
                tuple(m for m in MODULE_BLACKLIST if m.endswith('.')))):
            continue

        for late_import in _get_late_imports(module):
            # Some modules can *only* be late-imported,
            if late_import in LATE_IMPORT_BLACKLIST:
                continue

            # If late_import transitively imports us, then it's not safe
            # to move it to the top level: that would introduce a cycle.
            if module in _calculate_transitive_imports(late_import):
                continue

            # If a module is marked `@UnusedImport` or `@Nolint`, then
            # it's being imported for its side effects, and we don't
            # want to move it to the top level.
            import_lines = _get_import_lines(f)
            late_import_line = next(
                il for il in import_lines
                if (il.module == late_import and not il.is_toplevel))
            contents = lintutil.file_contents(f)
            lines = contents.splitlines(True)
            if ('@UnusedImport' in lines[late_import_line.lineno - 1]
                    or '@Nolint' in lines[late_import_line.lineno - 1]):
                continue

            # If we get here, it's safe to move this import to the top level!
            yield (f, late_import_line.lineno,
                   "Make this a top-level import: it doesn't cause cycles")

            # For the rest of our analysis, assume that `module` has
            # moved the import of `late_import` to the top level.
            # TODO(csilvers): figure out the "best" edge to add, rather
            # than just doing first come first served.
            _add_edge(module, late_import)
Beispiel #22
0
def lint_static_url_modifier(files_to_lint):
    """Be sure every static url in an .html file uses |static_url or static().

    We serve static urls such as images and fonts from kastatic.org,
    for efficiency.  The |static_url and static_url() jinja2 modifiers are ways
    to rewrite urls to use that.  This linter makes sure people remember to do
    that for new code.
    """
    files = lintutil.filter(files_to_lint, prefix='templates/', suffix='.html')
    if not files:
        return

    # generate_static_urls_regexp matches standalone urls, but we need
    # to match them in the context of a line, so we need to mess with
    # ^ and $ a bit.  We just get rid of $, which could yield false
    # positives in theory but doesn't in practice.  We replace ^ with
    # something that matches 'things you might find before the start
    # of a static url but not before something like an API route name."
    static_url_regexp = url_util.STATIC_URL_REGEX
    static_url_regexp = static_url_regexp.replace('$', '')
    # The '%s' in the regexp below is to catch jinja2 html like:
    #   <img src="{{ "https://%s/images/..." % hostname }}">
    static_url_regexp = static_url_regexp.replace(
        '^/', r'([^\w_./-]["\'/]|khanacademy.org/|%s/)')

    # It's good when the static-url is followed by '|static_url' (it
    # needs to be in a '{{ ... }}' for that) or preceded by 'static_url('.
    # We say the regexp is bad if neither case is present.
    # This is cheaper than using a negative lookahead (I think).
    good_regexp = (r'(static_url\s*\(\s*)?(%s)([^}]*\|static_url)?'
                   % static_url_regexp)

    # We do two checks: one for the static url by name, and one for
    # any src= parameter (or poster=, which is used for videos).
    static_url_re = re.compile(good_regexp)
    # We take advantage of the fact we don't use spaces in our static urls.
    src_regex = re.compile(r'\b(src|poster)=((?:\{[^\}]*\}|[^ >])*)')

    for f in files:
        contents_of_f = lintutil.file_contents(f)
        error_lines = set()   # just to avoid duplicate error messages

        for m in static_url_re.finditer(contents_of_f):
            if not m.groups()[0] and not m.groups()[-1]:
                lineno = 1 + contents_of_f.count('\n', 0, m.start())
                # STATIC_URL_REGEX matches the character before the url-string,
                # and also the opening quote, so get rid of them both.
                url_prefix = m.group(2)[1:].strip('\'\"')
                yield (f, lineno,
                       ('The static url "%s..." should use "|static_url" '
                       + 'after the url or call "static_url()" with the url ' +
                       'passed in') % url_prefix)
                error_lines.add(lineno)

        # emails maybe want to point to ka.org to avoid confusion.
        # TODO(csilvers): should we point them to kastatic.org too?
        if '/emails/' not in f:
            for m in src_regex.finditer(contents_of_f):
                url = m.group(2).strip('\'\"{} ')
                if not url:
                    continue
                # A simple, but effective, effort to find urls that point to
                # non-ka domains.  (Most ka domains we use are .org!)
                if '.com' in url and '.appspot.com' not in url:
                    continue
                if url.startswith('data:'):
                    continue
                # If it's already a url that we serve off the CDN, don't
                # complain.
                if any(u in url for (_, u) in url_util.FASTLY_URL_MAP):
                    continue
                # If it's an <iframe src=...>, don't complain; those are
                # never static resources.  We just do a cheap check.
                end_of_line = contents_of_f.index('\n', m.end())
                if '</iframe>' in contents_of_f[m.end():end_of_line]:
                    continue

                if '|static_url' not in url:
                    lineno = 1 + contents_of_f.count('\n', 0, m.start())
                    if lineno not in error_lines:  # a new error
                        yield (f, lineno,
                               'Use |static_url with "%s" attributes'
                               % m.group(1))
Beispiel #23
0
def lint_missing_safe_in_jinja2(files_to_lint):
    """Find instances where we translate html but don't mark it |safe.

    We html-escape the output of {{ _("...") }} in html templates, which
    is safe but can cause problems when the text is {{ _("<b>hi</b>") }}.
    In that case, the user needs to do {{ _("<b>hi</b>")|safe }}.

    We detect instances where the user does {{ _("...<..") }} in jinja2
    templates but lacks the |safe afterwards.  Unless that line is marked
    with {{# @Nolint #}}, we flag it as a lint error.

    Returns:
       List of triples: (filename, lineno, error message)
    """
    # Limit files-to-lint to html and txt files under templates/.
    files = lintutil.filter(files_to_lint,
                            prefix='templates/', suffix=('.html', '.txt'))

    for filename in files:
        contents = lintutil.file_contents(filename)
        for fn_match in _J2_FUNCTION_RE.finditer(contents):
            # Make sure there's no @Nolint anywhere around this function.
            newline = contents.find('\n', fn_match.end())
            newline = newline if newline > -1 else len(contents)
            if '@Nolint' in contents[fn_match.start():newline]:
                continue

            for m in _GETTEXT_RE.finditer(fn_match.group(0)):
                if _needs_safe(m.group(1), m.group(2)):
                    linenum = 1 + contents.count('\n', 0,
                                                 fn_match.start() + m.start(1))
                    yield (filename, linenum,
                           'Replace %s with _("..."|safe) to avoid escaping'
                           'the tag/attr inside the _().' % m.group(1))
                for km in _KWARG_RE.finditer(m.group(3)):
                    if _needs_safe(km.group(1), km.group(2)):
                        linenum = 1 + contents.count('\n', 0,
                                                     fn_match.start() +
                                                     m.start() + km.start(1))
                        yield (filename, linenum,
                               'Replace %s with _(..., foo="..."|safe) to '
                               'avoid escaping the tag/attr inside foo.'
                               % km.group(1))

            for m in _NGETTEXT_RE.finditer(fn_match.group(0)):
                if _needs_safe(m.group(1), m.group(2)):
                    linenum = 1 + contents.count('\n', 0,
                                                 fn_match.start() + m.start(1))
                    yield (filename, linenum,
                           'Replace %s with ngettext("..."|safe, ...) to '
                           'avoid escaping the tag/attr inside the _().'
                           % m.group(1))
                if _needs_safe(m.group(3), m.group(4)):
                    linenum = 1 + contents.count('\n', 0,
                                                 fn_match.start() + m.start(3))
                    yield (filename, linenum,
                           'Replace %s with ngettext(..., "..."|safe) to '
                           'avoid escaping the tag/attr inside the _().'
                           % m.group(3))
                for km in _KWARG_RE.finditer(m.group(5)):
                    if _needs_safe(km.group(1), km.group(2)):
                        linenum = 1 + contents.count('\n', 0,
                                                     fn_match.start() +
                                                     m.start() + km.start(1))
                        yield (filename, linenum,
                               'Replace %s with ngettext(..., foo="..."|safe)'
                               ' to avoid escaping the tag/attr inside foo.'
                               % km.group(1))
Beispiel #24
0
def lint_not_using_gettext_at_import_time(files_to_lint):
    """Make sure we don't use i18n._/etc in a static context.

    If you have a global variable such as '_FOO = i18n._("bar")', at
    the top of some .py file, it won't work the way you intend because
    i18n._() needs to be called while handling a request in order to
    know what language to translate to.  (Instead, you'd need to do
        _FOO = lambda: i18n._("bar")
    or some such.)

    This tests for this by mocking i18n._ et al., and then importing
    everything (but running nothing).  Any i18n._ calls that happen
    during this import are problematic!  We have to spawn a new
    python process to make sure we do the importing properly (and
    without messing with the currently running python environment!)
    """
    candidate_files_to_lint = lintutil.filter(files_to_lint, suffix='.py')
    files_to_lint = []

    for filename in candidate_files_to_lint:
        contents = lintutil.file_contents(filename)

        # Check that it's plausible this file uses i18n._ or similar.
        # This also avoids importing random third-party files that may
        # have nasty side-effects at import time (all our code is too
        # well-written to do that!)
        if 'import intl' in contents or 'from intl' in contents:
            files_to_lint.append(filename)

    program = """\
import os                # @Nolint(linter can't tell this is in a string!)
import sys               # @Nolint(linter can't tell this is in a string!)
import traceback

import intl.request      # @Nolint(seems unused to our linter but it's used)

_ROOT = "%s"

def add_lint_error(f):
    # We assume code in 'intl' doesn't make this mistake, and thus
    # the first stack-frame before we get into 'intl' is the
    # offending code.  ctx == '<string>' means the error occurred in
    # this pseudo-script.
    for (ctx, lineno, fn, line) in reversed(traceback.extract_stack()):
        if os.path.isabs(ctx):
            ctx = os.path.relpath(ctx, _ROOT)
        if ctx != '<string>' and not ctx.startswith('intl/'):
            if ctx == f:
                print 'GETTEXT ERROR {} {}'.format(ctx, lineno)
            break
    return 'en'     # a fake value for intl.request.ka_locale

""" % ka_root.root

    if not files_to_lint:
        return

    for filename in files_to_lint:
        modulename = ka_root.relpath(filename)
        modulename = os.path.splitext(modulename)[0]  # nix .py
        modulename = modulename.replace('/', '.')
        # Force a re-import.
        program += 'sys.modules.pop("%s", None)\n' % modulename
        program += ('intl.request.ka_locale = lambda: add_lint_error("%s")\n'
                    % ka_root.relpath(filename))
        program += 'import %s\n' % modulename

    p = subprocess.Popen(
        ['env', 'PYTHONPATH=%s' % ':'.join(sys.path),
         sys.executable, '-c', program],
        stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    p.wait()
    lint_output = p.stdout.read()
    for line in lint_output.splitlines():
        if line.startswith('GETTEXT ERROR '):
            line = line[len('GETTEXT ERROR '):]
            (filename, linenum) = line.rsplit(' ', 1)
            yield (ka_root.join(filename), int(linenum),
                   'Trying to translate at import-time, but '
                   'translation only works at runtime! '
                   'Use intl.i18n.mark_for_translation() instead.')
Beispiel #25
0
def lint_templates_are_translated(files_to_lint):
    """Verify that nltext in the input templates are marked for translation.

    All natural-language text in jinja2 and handlebars files should be
    marked for translation, using {{ _("...") }} or {{#_}}...{{/_}}.
    i18nize_templates.py is a tool that can do this for you automatically.
    We run this tool in 'check' mode to verify that every input file
    is already marked up appropriately.

    Since i18nize_templates isn't perfect (it thinks you need to
    translate text like 'Lebron James' or 'x' when used on a 'close'
    button), you can use nolint-like functionality to tell this linter
    it's ok if some text is not marked up to be translated.  Unlike
    other tests though, we do not use the @Nolint directive for this,
    but instead wrap the relevant text in
       {{ i18n_do_not_translate(...) }}
    or
       {{#i18nDoNotTranslate}}...{{/i18nDoNotTranslate}}
    """
    # Add some ka-specific function we know do not have nltext arguments.
    i18nize_templates.mark_function_args_lack_nltext(
        'js_css_packages.package',
        'js_css_packages.script',
        'handlebars_template',
        'youtube.player_embed',
        'log.date.strftime',
        'emails.tracking_image_url',
        'templatetags.to_canonical_url',
        'render_react',
    )

    for f in files_to_lint:
        abs_f = f
        f = ka_root.relpath(f)

        # Exclude files that we don't need to translate: we don't care
        # if those files are 'properly' marked up or not.
        if intl.english_only.should_not_translate_file(f):
            continue

        if (f.startswith('templates' + os.sep)
                and (f.endswith('.html') or f.endswith('.txt'))):
            # jinja2 template
            parser = i18nize_templates.get_parser_for_file(f)
            correction = 'wrap the text in {{ i18n_do_not_translate() }}'
        elif f.endswith('.handlebars'):
            # handlebars template
            parser = i18nize_templates.get_parser_for_file(f)
            correction = ('wrap the text in {{#i18nDoNotTranslate}}...'
                          '{{/i18nDoNotTranslate}}')
        else:
            continue

        file_contents = lintutil.file_contents(abs_f)
        try:
            parsed_output = parser.parse(
                file_contents.decode('utf-8')).encode('utf-8')
        except i18nize_templates.HTMLParser.HTMLParseError, why:
            m = re.search(r'at line (\d+)', str(why))
            linenum = int(m.group(1)) if m else 1
            yield (abs_f, linenum,
                   '"i18nize_templates.py %s" fails: %s' % (f, why))
            continue

        orig_lines = file_contents.splitlines()
        parsed_lines = parsed_output.splitlines()
        for i in xrange(len(orig_lines)):
            if orig_lines[i] != parsed_lines[i]:
                yield (abs_f, i + 1,
                       'Missing _(); run tools/i18nize_templates.py or %s '
                       '(expecting "%s")' %
                       (correction, parsed_lines[i].strip()))
Beispiel #26
0
def lint_templates_are_translated(files_to_lint):
    """Verify that nltext in the input templates are marked for translation.

    All natural-language text in jinja2 and handlebars files should be
    marked for translation, using {{ _("...") }} or {{#_}}...{{/_}}.
    i18nize_templates.py is a tool that can do this for you automatically.
    We run this tool in 'check' mode to verify that every input file
    is already marked up appropriately.

    Since i18nize_templates isn't perfect (it thinks you need to
    translate text like 'Lebron James' or 'x' when used on a 'close'
    button), you can use nolint-like functionality to tell this linter
    it's ok if some text is not marked up to be translated.  Unlike
    other tests though, we do not use the @Nolint directive for this,
    but instead wrap the relevant text in
       {{ i18n_do_not_translate(...) }}
    or
       {{#i18nDoNotTranslate}}...{{/i18nDoNotTranslate}}
    """
    # Add some ka-specific function we know do not have nltext arguments.
    i18nize_templates.mark_function_args_lack_nltext(
        'js_css_packages.package',
        'js_css_packages.script',
        'handlebars_template',
        'youtube.player_embed',
        'log.date.strftime',
        'emails.tracking_image_url',
        'templatetags.to_canonical_url',
        'render_react',
    )

    for f in files_to_lint:
        abs_f = f
        f = ka_root.relpath(f)

        # Exclude files that we don't need to translate: we don't care
        # if those files are 'properly' marked up or not.
        if intl.english_only.should_not_translate_file(f):
            continue

        if (f.startswith('templates' + os.sep) and
            (f.endswith('.html') or f.endswith('.txt'))):
            # jinja2 template
            parser = i18nize_templates.get_parser_for_file(f)
            correction = 'wrap the text in {{ i18n_do_not_translate() }}'
        elif f.endswith('.handlebars'):
            # handlebars template
            parser = i18nize_templates.get_parser_for_file(f)
            correction = ('wrap the text in {{#i18nDoNotTranslate}}...'
                          '{{/i18nDoNotTranslate}}')
        else:
            continue

        file_contents = lintutil.file_contents(abs_f)
        try:
            parsed_output = parser.parse(
                file_contents.decode('utf-8')).encode('utf-8')
        except i18nize_templates.HTMLParser.HTMLParseError, why:
            m = re.search(r'at line (\d+)', str(why))
            linenum = int(m.group(1)) if m else 1
            yield (abs_f, linenum,
                   '"i18nize_templates.py %s" fails: %s' % (f, why))
            continue

        orig_lines = file_contents.splitlines()
        parsed_lines = parsed_output.splitlines()
        for i in xrange(len(orig_lines)):
            if orig_lines[i] != parsed_lines[i]:
                yield (abs_f, i + 1,
                       'Missing _(); run tools/i18nize_templates.py or %s '
                       '(expecting "%s")'
                       % (correction, parsed_lines[i].strip()))
Beispiel #27
0
def lint_not_using_gettext_at_import_time(files_to_lint):
    """Make sure we don't use i18n._/etc in a static context.

    If you have a global variable such as '_FOO = i18n._("bar")', at
    the top of some .py file, it won't work the way you intend because
    i18n._() needs to be called while handling a request in order to
    know what language to translate to.  (Instead, you'd need to do
        _FOO = lambda: i18n._("bar")
    or some such.)

    This tests for this by mocking i18n._ et al., and then importing
    everything (but running nothing).  Any i18n._ calls that happen
    during this import are problematic!  We have to spawn a new
    python process to make sure we do the importing properly (and
    without messing with the currently running python environment!)
    """
    candidate_files_to_lint = lintutil.filter(files_to_lint, suffix='.py')
    files_to_lint = []

    for filename in candidate_files_to_lint:
        contents = lintutil.file_contents(filename)

        # Check that it's plausible this file uses i18n._ or similar.
        # This also avoids importing random third-party files that may
        # have nasty side-effects at import time (all our code is too
        # well-written to do that!)
        if 'import intl' in contents or 'from intl' in contents:
            files_to_lint.append(filename)

    program = """\
import os                # @Nolint(linter can't tell this is in a string!)
import sys               # @Nolint(linter can't tell this is in a string!)
import traceback

import intl.request      # @Nolint(seems unused to our linter but it's used)

_ROOT = "%s"

def add_lint_error(f):
    # We assume code in 'intl' doesn't make this mistake, and thus
    # the first stack-frame before we get into 'intl' is the
    # offending code.  ctx == '<string>' means the error occurred in
    # this pseudo-script.
    for (ctx, lineno, fn, line) in reversed(traceback.extract_stack()):
        if os.path.isabs(ctx):
            ctx = os.path.relpath(ctx, _ROOT)
        if ctx != '<string>' and not ctx.startswith('intl/'):
            if ctx == f:
                print 'GETTEXT ERROR {} {}'.format(ctx, lineno)
            break
    return 'en'     # a fake value for intl.request.ka_locale

""" % ka_root.root

    if not files_to_lint:
        return

    for filename in files_to_lint:
        modulename = ka_root.relpath(filename)
        modulename = os.path.splitext(modulename)[0]  # nix .py
        modulename = modulename.replace('/', '.')
        # Force a re-import.
        program += 'sys.modules.pop("%s", None)\n' % modulename
        program += ('intl.request.ka_locale = lambda: add_lint_error("%s")\n' %
                    ka_root.relpath(filename))
        program += 'import %s\n' % modulename

    p = subprocess.Popen([
        'env',
        'PYTHONPATH=%s' % ':'.join(sys.path), sys.executable, '-c', program
    ],
                         stdout=subprocess.PIPE,
                         stderr=subprocess.STDOUT)
    p.wait()
    lint_output = p.stdout.read()
    for line in lint_output.splitlines():
        if line.startswith('GETTEXT ERROR '):
            line = line[len('GETTEXT ERROR '):]
            (filename, linenum) = line.rsplit(' ', 1)
            yield (ka_root.join(filename), int(linenum),
                   'Trying to translate at import-time, but '
                   'translation only works at runtime! '
                   'Use intl.i18n.mark_for_translation() instead.')