def test_bom(self):
            # A BOM means utf-8.
            source = "\xEF\xBB\xBFtext = 'hello'\n"
            self.assertEqual(source_encoding(source), 'utf-8-sig')

            # But it has to be the only authority.
            source = "\xEF\xBB\xBF# coding: cp850\n"
            with self.assertRaises(SyntaxError):
                source_encoding(source)
Example #2
0
    def test_bom(self):
        # A BOM means utf-8.
        source = b"\xEF\xBB\xBFtext = 'hello'\n"
        self.assertEqual(source_encoding(source), 'utf-8-sig')

        # But it has to be the only authority.
        source = b"\xEF\xBB\xBF# coding: cp850\n"
        with self.assertRaises(SyntaxError):
            source_encoding(source)
Example #3
0
 def test_detect_source_encoding(self):
     # Various forms from http://www.python.org/dev/peps/pep-0263/
     source = "# coding=cp850\n\n"
     self.assertEqual(source_encoding(source), 'cp850')
     source = "#!/usr/bin/python\n# -*- coding: utf-8 -*-\n"
     self.assertEqual(source_encoding(source), 'utf-8')
     source = "#!/usr/bin/python\n# vim: set fileencoding=utf8 :\n"
     self.assertEqual(source_encoding(source), 'utf8')
     source = "# This Python file uses this encoding: utf-8\n"
     self.assertEqual(source_encoding(source), 'utf-8')
 def test_detect_source_encoding(self):
     # Various forms from http://www.python.org/dev/peps/pep-0263/
     source = "# coding=cp850\n\n"
     self.assertEqual(source_encoding(source), 'cp850')
     source = "#!/usr/bin/python\n# -*- coding: utf-8 -*-\n"
     self.assertEqual(source_encoding(source), 'utf-8')
     source = "#!/usr/bin/python\n# vim: set fileencoding=utf8 :\n"
     self.assertEqual(source_encoding(source), 'utf8')
     source = "# This Python file uses this encoding: utf-8\n"
     self.assertEqual(source_encoding(source), 'utf-8')
Example #5
0
 def test_detect_source_encoding(self):
     for _, source in ENCODING_DECLARATION_SOURCES:
         self.assertEqual(
             source_encoding(source),
             'cp850',
             "Wrong encoding in %r" % source
         )
Example #6
0
def get_python_source(filename):
    """Return the source code, as unicode."""
    base, ext = os.path.splitext(filename)
    if ext == ".py" and env.WINDOWS:
        exts = [".py", ".pyw"]
    else:
        exts = [ext]

    for ext in exts:
        try_filename = base + ext
        if os.path.exists(try_filename):
            # A regular text file: open it.
            source = read_python_source(try_filename)
            break

        # Maybe it's in a zip file?
        source = get_zip_bytes(try_filename)
        if source is not None:
            break
    else:
        # Couldn't find source.
        exc_msg = "No source for code: '%s'.\n" % (filename,)
        exc_msg += "Aborting report output, consider using -i."
        raise NoSource(exc_msg)

    # Replace \f because of http://bugs.python.org/issue19035
    source = source.replace(b'\f', b' ')
    source = source.decode(source_encoding(source), "replace")

    # Python code should always end with a line with a newline.
    if source and source[-1] != '\n':
        source += '\n'

    return source
Example #7
0
 def test_detect_source_encoding(self):
     for _, source, expected in ENCODING_DECLARATION_SOURCES:
         self.assertEqual(
             source_encoding(source),
             expected,
             "Wrong encoding in %r" % source
         )
Example #8
0
def get_python_source(filename):
    """Return the source code, as unicode."""
    base, ext = os.path.splitext(filename)
    if ext == ".py" and env.WINDOWS:
        exts = [".py", ".pyw"]
    else:
        exts = [ext]

    for ext in exts:
        try_filename = base + ext
        if os.path.exists(try_filename):
            # A regular text file: open it.
            source = read_python_source(try_filename)
            break

        # Maybe it's in a zip file?
        source = get_zip_bytes(try_filename)
        if source is not None:
            break
    else:
        # Couldn't find source.
        exc_msg = f"No source for code: '{filename}'.\n"
        exc_msg += "Aborting report output, consider using -i."
        raise NoSource(exc_msg)

    # Replace \f because of http://bugs.python.org/issue19035
    source = source.replace(b'\f', b' ')
    source = source.decode(source_encoding(source), "replace")

    # Python code should always end with a line with a newline.
    if source and source[-1] != '\n':
        source += '\n'

    return source
Example #9
0
    def parse_file(self, cu, analysis):
        """ Generate data for single file """
        if hasattr(analysis, 'parser'):
            filename = cu.file_locator.relative_filename(cu.filename)
            source_lines = analysis.parser.lines
            with cu.source_file() as source_file:
                source = source_file.read()
            try:
                if sys.version_info < (3, 0):
                    encoding = source_encoding(source)
                    if encoding != 'utf-8':
                        source = source.decode(encoding).encode('utf-8')
            except UnicodeDecodeError:
                log.warn('Source file %s can not be properly decoded, skipping. '
                         'Please check if encoding declaration is ok', basename(cu.filename))
                return
        else:
            if hasattr(cu, 'relative_filename'):
                filename = cu.relative_filename()
            else:
                filename = analysis.coverage.file_locator.relative_filename(cu.filename)
            source_lines = list(enumerate(analysis.file_reporter.source_token_lines()))
            source = analysis.file_reporter.source()
        coverage_lines = [self.get_hits(i, analysis) for i in range(1, len(source_lines) + 1)]

        results = {
            'name': filename,
            'source': source,
            'coverage': coverage_lines,
        }
        branches = self.get_arcs(analysis)
        if branches:
            results['branches'] = branches

        self.source_files.append(results)
Example #10
0
def get_python_source(filename):
    """Return the source code, as unicode."""
    base, ext = os.path.splitext(filename)
    if ext == ".py" and env.WINDOWS:
        exts = [".py", ".pyw"]
    else:
        exts = [ext]

    for ext in exts:
        try_filename = base + ext
        if os.path.exists(try_filename):
            # A regular text file: open it.
            source = read_python_source(try_filename)
            break

        # Maybe it's in a zip file?
        source = get_zip_bytes(try_filename)
        if source is not None:
            break
    else:
        # Couldn't find source.
        raise NoSource("No source for code: '%s'." % filename)

    source = source.decode(source_encoding(source), "replace")

    # Python code should always end with a line with a newline.
    if source and source[-1] != '\n':
        source += '\n'

    return source
Example #11
0
 def test_detect_source_encoding_not_in_comment(self):
     if env.PYPY and env.PY3:        # pramga: no metacov
         # PyPy3 gets this case wrong. Not sure what I can do about it,
         # so skip the test.
         self.skipTest("PyPy3 is wrong about non-comment encoding. Skip it.")
     # Should not detect anything here
     source = b'def parse(src, encoding=None):\n    pass'
     self.assertEqual(source_encoding(source), DEF_ENCODING)
Example #12
0
 def test_detect_source_encoding_not_in_comment(self):
     if env.PYPY and env.PY3:
         # PyPy3 gets this case wrong. Not sure what I can do about it,
         # so skip the test.
         raise SkipTest
     # Should not detect anything here
     source = b'def parse(src, encoding=None):\n    pass'
     self.assertEqual(source_encoding(source), DEF_ENCODING)
Example #13
0
 def test_detect_source_encoding_not_in_comment(self):
     if env.PYPY and env.PY3:
         # PyPy3 gets this case wrong. Not sure what I can do about it,
         # so skip the test.
         self.skipTest("PyPy3 is wrong about non-comment encoding. Skip it.")
     # Should not detect anything here
     source = b'def parse(src, encoding=None):\n    pass'
     self.assertEqual(source_encoding(source), DEF_ENCODING)
Example #14
0
 def source(self):
     if self._source is None:
         self._source = get_python_source(self.filename)
         if env.PY2:
             encoding = source_encoding(self._source)
             self._source = self._source.decode(encoding, "replace")
         assert isinstance(self._source, unicode_class)
     return self._source
Example #15
0
 def test_neuter_encoding_declaration(self):
     for source in ENCODING_DECLARATION_SOURCES:
         neutered = neuter_encoding_declaration(source.decode("ascii"))
         neutered = neutered.encode("ascii")
         self.assertEqual(
             source_encoding(neutered),
             DEF_ENCODING,
             "Wrong encoding in %r" % neutered
         )
Example #16
0
 def parse_file(self, cu, analysis):
     """ Generate data for single file """
     filename = cu.file_locator.relative_filename(cu.filename)
     coverage_lines = [self.get_hits(i, analysis) for i in range(1, len(analysis.parser.lines) + 1)]
     source_file = cu.source_file()
     try:
         source = source_file.read()
         if sys.version_info < (3, 0):
             encoding = source_encoding(source)
             if encoding != 'utf-8':
                 source = source.decode(encoding).encode('utf-8')
     finally:
         source_file.close()
     self.source_files.append({
         'name': filename,
         'source': source,
         'coverage': coverage_lines
     })
Example #17
0
    def test_neuter_encoding_declaration(self):
        for lines_diff_expected, source, _ in ENCODING_DECLARATION_SOURCES:
            neutered = neuter_encoding_declaration(source.decode("ascii"))
            neutered = neutered.encode("ascii")

            # The neutered source should have the same number of lines.
            source_lines = source.splitlines()
            neutered_lines = neutered.splitlines()
            assert len(source_lines) == len(neutered_lines)

            # Only one of the lines should be different.
            lines_different = sum(
                int(nline != sline) for nline, sline in zip(neutered_lines, source_lines)
            )
            assert lines_diff_expected == lines_different

            # The neutered source will be detected as having no encoding
            # declaration.
            assert source_encoding(neutered) == DEF_ENCODING, "Wrong encoding in %r" % neutered
Example #18
0
 def parse_file(self, cu, analysis):
     """ Generate data for single file """
     filename = cu.file_locator.relative_filename(cu.filename)
     coverage_lines = [
         self.get_hits(i, analysis)
         for i in range(1,
                        len(analysis.parser.lines) + 1)
     ]
     source_file = cu.source_file()
     try:
         source = source_file.read()
         if sys.version_info < (3, 0):
             encoding = source_encoding(source)
             if encoding != 'utf-8':
                 source = source.decode(encoding).encode('utf-8')
     finally:
         source_file.close()
     self.source_files.append({
         'name': filename,
         'source': source,
         'coverage': coverage_lines
     })
Example #19
0
    def test_neuter_encoding_declaration(self):
        for lines_diff_expected, source in ENCODING_DECLARATION_SOURCES:
            neutered = neuter_encoding_declaration(source.decode("ascii"))
            neutered = neutered.encode("ascii")

            # The neutered source should have the same number of lines.
            source_lines = source.splitlines()
            neutered_lines = neutered.splitlines()
            self.assertEqual(len(source_lines), len(neutered_lines))

            # Only one of the lines should be different.
            lines_different = sum(
                int(nline != sline) for nline, sline in zip(neutered_lines, source_lines)
            )
            self.assertEqual(lines_diff_expected, lines_different)

            # The neutered source will be detected as having no encoding
            # declaration.
            self.assertEqual(
                source_encoding(neutered),
                DEF_ENCODING,
                "Wrong encoding in %r" % neutered
            )
Example #20
0
 def test_detect_source_encoding_not_in_comment(self):
     # Should not detect anything here
     source = 'def parse(src, encoding=None):\n    pass'
     self.assertEqual(source_encoding(source), 'ascii')
Example #21
0
 def test_unknown_encoding(self):
     source = b"# coding: klingon\n"
     with pytest.raises(SyntaxError, match="unknown encoding: klingon"):
         source_encoding(source)
Example #22
0
 def source_encoding(self):
     return source_encoding(self.source())
Example #23
0
 def test_dont_detect_source_encoding_on_third_line(self):
     # A coding declaration doesn't count on the third line.
     source = b"\n\n# coding=cp850\n\n"
     self.assertEqual(source_encoding(source), DEF_ENCODING)
Example #24
0
 def test_bom_is_wrong(self):
     # A BOM with an explicit non-utf8 encoding is an error.
     source = b"\xEF\xBB\xBF# coding: cp850\n"
     with self.assertRaisesRegex(SyntaxError, "encoding problem: utf-8"):
         source_encoding(source)
Example #25
0
 def test_detect_source_encoding_of_empty_file(self):
     # An important edge case.
     self.assertEqual(source_encoding(""), 'ascii')
Example #26
0
    def html_file(self, cu, analysis):
        source_file = cu.source_file()
        try:
            source = source_file.read()
        finally:
            source_file.close()

        flat_rootname = cu.flat_rootname()
        this_hash = self.file_hash(source, cu)
        that_hash = self.status.file_hash(flat_rootname)
        if this_hash == that_hash:
            self.files.append(self.status.index_info(flat_rootname))
            return
        self.status.set_file_hash(flat_rootname, this_hash)
        if sys.version_info < (3, 0):
            encoding = source_encoding(source)
            if encoding.startswith('utf-8') and source[:3] == '\xef\xbb\xbf':
                source = source[3:]
                encoding = 'utf-8'
        nums = analysis.numbers
        missing_branch_arcs = analysis.missing_branch_arcs()
        c_run = 'run hide_run'
        c_exc = 'exc'
        c_mis = 'mis'
        c_par = 'par ' + c_run
        lines = []
        for lineno, line in enumerate(source_token_lines(source)):
            lineno += 1
            line_class = []
            annotate_html = ''
            annotate_title = ''
            if lineno in analysis.statements:
                line_class.append('stm')
            if lineno in analysis.excluded:
                line_class.append(c_exc)
            elif lineno in analysis.missing:
                line_class.append(c_mis)
            elif self.arcs and lineno in missing_branch_arcs:
                line_class.append(c_par)
                annlines = []
                for b in missing_branch_arcs[lineno]:
                    if b < 0:
                        annlines.append('exit')
                    else:
                        annlines.append(str(b))

                annotate_html = '&nbsp;&nbsp; '.join(annlines)
                if len(annlines) > 1:
                    annotate_title = 'no jumps to these line numbers'
                elif len(annlines) == 1:
                    annotate_title = 'no jump to this line number'
            elif lineno in analysis.statements:
                line_class.append(c_run)
            html = []
            for tok_type, tok_text in line:
                if tok_type == 'ws':
                    html.append(escape(tok_text))
                else:
                    tok_html = escape(tok_text) or '&nbsp;'
                    html.append("<span class='%s'>%s</span>" % (tok_type, tok_html))

            lines.append({'html': ''.join(html),
             'number': lineno,
             'class': ' '.join(line_class) or 'pln',
             'annotate': annotate_html,
             'annotate_title': annotate_title})

        html = spaceless(self.source_tmpl.render({'c_exc': c_exc,
         'c_mis': c_mis,
         'c_par': c_par,
         'c_run': c_run,
         'arcs': self.arcs,
         'extra_css': self.extra_css,
         'cu': cu,
         'nums': nums,
         'lines': lines}))
        if sys.version_info < (3, 0):
            html = html.decode(encoding)
        html_filename = flat_rootname + '.html'
        html_path = os.path.join(self.directory, html_filename)
        self.write_html(html_path, html)
        index_info = {'nums': nums,
         'html_filename': html_filename,
         'name': cu.name}
        self.files.append(index_info)
        self.status.set_index_info(flat_rootname, index_info)
Example #27
0
 def test_dont_detect_source_encoding_on_third_line(self):
     # A coding declaration doesn't count on the third line.
     source = b"\n\n# coding=cp850\n\n"
     assert source_encoding(source) == DEF_ENCODING
Example #28
0
 def test_detect_source_encoding_of_empty_file(self):
     # An important edge case.
     self.assertEqual(source_encoding(b""), DEF_ENCODING)
Example #29
0
 def test_detect_source_encoding_of_empty_file(self):
     # An important edge case.
     assert source_encoding(b"") == DEF_ENCODING
Example #30
0
 def test_bom_with_encoding(self):
     source = b"\xEF\xBB\xBF# coding: utf-8\ntext = 'hello'\n"
     self.assertEqual(source_encoding(source), 'utf-8-sig')
Example #31
0
 def test_bom(self):
     # A BOM means utf-8.
     source = b"\xEF\xBB\xBFtext = 'hello'\n"
     assert source_encoding(source) == 'utf-8-sig'
Example #32
0
 def test_bom_with_encoding(self):
     source = b"\xEF\xBB\xBF# coding: utf-8\ntext = 'hello'\n"
     assert source_encoding(source) == 'utf-8-sig'
Example #33
0
 def test_bom_is_wrong(self):
     # A BOM with an explicit non-utf8 encoding is an error.
     source = b"\xEF\xBB\xBF# coding: cp850\n"
     with pytest.raises(SyntaxError, match="encoding problem: utf-8"):
         source_encoding(source)
Example #34
0
 def test_detect_source_encoding_not_in_comment(self):
     # Should not detect anything here
     source = 'def parse(src, encoding=None):\n    pass'
     self.assertEqual(source_encoding(source), 'ascii')
Example #35
0
 def test_detect_source_encoding_not_in_comment(self):
     # Should not detect anything here
     source = b'def parse(src, encoding=None):\n    pass'
     assert source_encoding(source) == DEF_ENCODING
Example #36
0
    def html_file(self, cu, analysis):
        """Generate an HTML file for one source file."""
        source_file = cu.source_file()
        try:
            source = source_file.read()
        finally:
            source_file.close()

        # Find out if the file on disk is already correct.
        flat_rootname = cu.flat_rootname()
        this_hash = self.file_hash(source, cu)
        that_hash = self.status.file_hash(flat_rootname)
        if this_hash == that_hash:
            # Nothing has changed to require the file to be reported again.
            self.files.append(self.status.index_info(flat_rootname))
            return

        self.status.set_file_hash(flat_rootname, this_hash)

        # If need be, determine the encoding of the source file. We use it
        # later to properly write the HTML.
        if sys.version_info < (3, 0):
            encoding = source_encoding(source)
            # Some UTF8 files have the dreaded UTF8 BOM. If so, junk it.
            if encoding.startswith("utf-8") and source[:3] == "\xef\xbb\xbf":
                source = source[3:]
                encoding = "utf-8"

        # Get the numbers for this file.
        nums = analysis.numbers

        if self.arcs:
            missing_branch_arcs = analysis.missing_branch_arcs()

        # These classes determine which lines are highlighted by default.
        c_run = "run hide_run"
        c_exc = "exc"
        c_mis = "mis"
        c_par = "par " + c_run

        lines = []

        for lineno, line in enumerate(source_token_lines(source)):
            lineno += 1  # 1-based line numbers.
            # Figure out how to mark this line.
            line_class = []
            annotate_html = ""
            annotate_title = ""
            if lineno in analysis.statements:
                line_class.append("stm")
            if lineno in analysis.excluded:
                line_class.append(c_exc)
            elif lineno in analysis.missing:
                line_class.append(c_mis)
            elif self.arcs and lineno in missing_branch_arcs:
                line_class.append(c_par)
                annlines = []
                for b in missing_branch_arcs[lineno]:
                    if b < 0:
                        annlines.append("exit")
                    else:
                        annlines.append(str(b))
                annotate_html = "&nbsp;&nbsp; ".join(annlines)
                if len(annlines) > 1:
                    annotate_title = "no jumps to these line numbers"
                elif len(annlines) == 1:
                    annotate_title = "no jump to this line number"
            elif lineno in analysis.statements:
                line_class.append(c_run)

            # Build the HTML for the line
            html = []
            for tok_type, tok_text in line:
                if tok_type == "ws":
                    html.append(escape(tok_text))
                else:
                    tok_html = escape(tok_text) or '&nbsp;'
                    html.append("<span class='%s'>%s</span>" %
                                (tok_type, tok_html))

            lines.append({
                'html': ''.join(html),
                'number': lineno,
                'class': ' '.join(line_class) or "pln",
                'annotate': annotate_html,
                'annotate_title': annotate_title,
            })

        # Write the HTML page for this file.
        html = spaceless(
            self.source_tmpl.render({
                'c_exc': c_exc,
                'c_mis': c_mis,
                'c_par': c_par,
                'c_run': c_run,
                'arcs': self.arcs,
                'extra_css': self.extra_css,
                'cu': cu,
                'nums': nums,
                'lines': lines,
            }))

        if sys.version_info < (3, 0):
            html = html.decode(encoding)

        html_filename = flat_rootname + ".html"
        html_path = os.path.join(self.directory, html_filename)
        self.write_html(html_path, html)

        # Save this file's information for the index file.
        index_info = {
            'nums': nums,
            'html_filename': html_filename,
            'name': cu.name,
        }
        self.files.append(index_info)
        self.status.set_index_info(flat_rootname, index_info)
Example #37
0
 def test_detect_source_encoding_on_second_line(self):
     # A coding declaration should be found despite a first blank line.
     source = "\n# coding=cp850\n\n"
     self.assertEqual(source_encoding(source), 'cp850')
Example #38
0
    def html_file(self, cu, analysis):
        """Generate an HTML file for one source file."""
        source_file = cu.source_file()
        try:
            source = source_file.read()
        finally:
            source_file.close()

        # Find out if the file on disk is already correct.
        flat_rootname = cu.flat_rootname()
        this_hash = self.file_hash(source, cu)
        that_hash = self.status.file_hash(flat_rootname)
        if this_hash == that_hash:
            # Nothing has changed to require the file to be reported again.
            self.files.append(self.status.index_info(flat_rootname))
            return

        self.status.set_file_hash(flat_rootname, this_hash)

        # If need be, determine the encoding of the source file. We use it
        # later to properly write the HTML.
        if sys.version_info < (3, 0):
            encoding = source_encoding(source)
            # Some UTF8 files have the dreaded UTF8 BOM. If so, junk it.
            if encoding.startswith("utf-8") and source[:3] == "\xef\xbb\xbf":
                source = source[3:]
                encoding = "utf-8"

        # Get the numbers for this file.
        nums = analysis.numbers

        if self.arcs:
            missing_branch_arcs = analysis.missing_branch_arcs()

        # These classes determine which lines are highlighted by default.
        c_run = "run hide_run"
        c_exc = "exc"
        c_mis = "mis"
        c_par = "par " + c_run

        lines = []

        for lineno, line in enumerate(source_token_lines(source)):
            lineno += 1     # 1-based line numbers.
            # Figure out how to mark this line.
            line_class = []
            annotate_html = ""
            annotate_title = ""
            if lineno in analysis.statements:
                line_class.append("stm")
            if lineno in analysis.excluded:
                line_class.append(c_exc)
            elif lineno in analysis.missing:
                line_class.append(c_mis)
            elif self.arcs and lineno in missing_branch_arcs:
                line_class.append(c_par)
                annlines = []
                for b in missing_branch_arcs[lineno]:
                    if b < 0:
                        annlines.append("exit")
                    else:
                        annlines.append(str(b))
                annotate_html = "&nbsp;&nbsp; ".join(annlines)
                if len(annlines) > 1:
                    annotate_title = "no jumps to these line numbers"
                elif len(annlines) == 1:
                    annotate_title = "no jump to this line number"
            elif lineno in analysis.statements:
                line_class.append(c_run)

            # Build the HTML for the line
            html = []
            for tok_type, tok_text in line:
                if tok_type == "ws":
                    html.append(escape(tok_text))
                else:
                    tok_html = escape(tok_text) or '&nbsp;'
                    html.append(
                        "<span class='%s'>%s</span>" % (tok_type, tok_html)
                        )

            lines.append({
                'html': ''.join(html),
                'number': lineno,
                'class': ' '.join(line_class) or "pln",
                'annotate': annotate_html,
                'annotate_title': annotate_title,
            })

        # Write the HTML page for this file.
        html = spaceless(self.source_tmpl.render({
            'c_exc': c_exc, 'c_mis': c_mis, 'c_par': c_par, 'c_run': c_run,
            'arcs': self.arcs, 'extra_css': self.extra_css,
            'cu': cu, 'nums': nums, 'lines': lines,
        }))

        if sys.version_info < (3, 0):
            html = html.decode(encoding)

        html_filename = flat_rootname + ".html"
        html_path = os.path.join(self.directory, html_filename)
        self.write_html(html_path, html)

        # Save this file's information for the index file.
        index_info = {
            'nums': nums,
            'html_filename': html_filename,
            'name': cu.name,
            }
        self.files.append(index_info)
        self.status.set_index_info(flat_rootname, index_info)
Example #39
0
 def test_dont_detect_source_encoding_on_third_line(self):
     # A coding declaration doesn't count on the third line.
     source = "\n\n# coding=cp850\n\n"
     self.assertEqual(source_encoding(source), 'ascii')
Example #40
0
 def test_unknown_encoding(self):
     source = b"# coding: klingon\n"
     with self.assertRaisesRegex(SyntaxError, "unknown encoding: klingon"):
         source_encoding(source)
Example #41
0
 def test_detect_source_encoding_of_empty_file(self):
     # An important edge case.
     self.assertEqual(source_encoding(""), 'ascii')
Example #42
0
 def test_detect_source_encoding_of_empty_file(self):
     # An important edge case.
     self.assertEqual(source_encoding(b""), DEF_ENCODING)
Example #43
0
 def test_detect_source_encoding(self):
     for _, source, expected in ENCODING_DECLARATION_SOURCES:
         assert source_encoding(
             source) == expected, f"Wrong encoding in {source!r}"
Example #44
0
 def test_bom(self):
     # A BOM means utf-8.
     source = b"\xEF\xBB\xBFtext = 'hello'\n"
     self.assertEqual(source_encoding(source), 'utf-8-sig')
Example #45
0
 def test_detect_source_encoding_on_second_line(self):
     # A coding declaration should be found despite a first blank line.
     source = b"\n# coding=cp850\n\n"
     self.assertEqual(source_encoding(source), 'cp850')
Example #46
0
 def source_encoding(self, source):
     return source_encoding(source)
Example #47
0
 def test_bom_with_encoding(self):
     source = b"\xEF\xBB\xBF# coding: utf-8\ntext = 'hello'\n"
     self.assertEqual(source_encoding(source), 'utf-8-sig')
Example #48
0
 def source_encoding(self, source):
     return source_encoding(source)
Example #49
0
 def test_bom_is_wrong(self):
     # A BOM with an explicit non-utf8 encoding is an error.
     source = b"\xEF\xBB\xBF# coding: cp850\n"
     with self.assertRaisesRegex(SyntaxError, "encoding problem: utf-8"):
         source_encoding(source)
Example #50
0
 def test_unknown_encoding(self):
     source = b"# coding: klingon\n"
     with self.assertRaisesRegex(SyntaxError, "unknown encoding: klingon"):
         source_encoding(source)
Example #51
0
 def test_bom(self):
     # A BOM means utf-8.
     source = b"\xEF\xBB\xBFtext = 'hello'\n"
     self.assertEqual(source_encoding(source), 'utf-8-sig')