Exemple #1
0
    def testEmacs(self):
        e = guess_encoding('# -*- coding: UTF-8  -*-')
        self.failUnlessEqual(e, 'UTF-8')
        e = guess_encoding('''
        ### -*- coding: ISO-8859-1  -*-
        ''')
        self.failUnlessEqual(e, 'ISO-8859-1')
        e = guess_encoding('''

        ### -*- coding: ISO-8859-1  -*-
        ''')
        self.failUnlessEqual(e, None)
Exemple #2
0
    def testVim(self):
        e = guess_encoding('# vim:fileencoding=UTF-8')
        self.failUnlessEqual(e, 'UTF-8')
        e = guess_encoding('''
        ### vim:fileencoding=ISO-8859-1
        ''')
        self.failUnlessEqual(e, 'ISO-8859-1')
        e = guess_encoding('''

        ### vim:fileencoding= ISO-8859-1
        ''')
        self.failUnlessEqual(e, None)
Exemple #3
0
    def testVim(self):
        e = guess_encoding('# vim:fileencoding=UTF-8')
        self.failUnlessEqual(e, 'UTF-8')
        e = guess_encoding('''
        ### vim:fileencoding=ISO-8859-1
        ''')
        self.failUnlessEqual(e, 'ISO-8859-1')
        e = guess_encoding('''

        ### vim:fileencoding= ISO-8859-1
        ''')
        self.failUnlessEqual(e, None)
Exemple #4
0
    def testEmacs(self):
        e = guess_encoding('# -*- coding: UTF-8  -*-')
        self.failUnlessEqual(e, 'UTF-8')
        e = guess_encoding('''
        ### -*- coding: ISO-8859-1  -*-
        ''')
        self.failUnlessEqual(e, 'ISO-8859-1')
        e = guess_encoding('''

        ### -*- coding: ISO-8859-1  -*-
        ''')
        self.failUnlessEqual(e, None)
Exemple #5
0
 def testXML(self):
     e = guess_encoding('<?xml?>')
     self.failUnlessEqual(e, 'UTF-8')
     e = guess_encoding('''<?xml version="1.0" encoding="ISO-8859-1" ?>
     ''')
     self.failUnlessEqual(e, 'ISO-8859-1')
     e = guess_encoding('''<?xml version="1.0" encoding="ISO-8859-1"?>
     ''')
     self.failUnlessEqual(e, 'ISO-8859-1')
     e = guess_encoding('''<?xml version="1.0" encoding="ISO-8859-1"?><truc encoding="UTF-8">
     </truc>
     ''')
     self.failUnlessEqual(e, 'ISO-8859-1')
Exemple #6
0
 def testXML(self):
     e = guess_encoding('<?xml?>')
     self.failUnlessEqual(e, 'UTF-8')
     e = guess_encoding('''<?xml version="1.0" encoding="ISO-8859-1" ?>
     ''')
     self.failUnlessEqual(e, 'ISO-8859-1')
     e = guess_encoding('''<?xml version="1.0" encoding="ISO-8859-1"?>
     ''')
     self.failUnlessEqual(e, 'ISO-8859-1')
     e = guess_encoding(
         '''<?xml version="1.0" encoding="ISO-8859-1"?><truc encoding="UTF-8">
     </truc>
     ''')
     self.failUnlessEqual(e, 'ISO-8859-1')
    def guess_encoding(self, data):
        """ Try to guess encoding from a text value.

        If no encoding can be guessed, fall back to utf-8.
        """
        if isinstance(data, six.text_type):
            # data maybe unicode but with another encoding specified
            data = data.encode('UTF-8')
        encoding = guess_encoding(data)
        if encoding is None:
            encoding = 'utf-8'
        return encoding
Exemple #8
0
    def guess_encoding(self, data):
        """ Try to guess encoding from a text value.

        If no encoding can be guessed, fall back to utf-8.
        """
        if isinstance(data, type(u'')):
            # data maybe unicode but with another encoding specified
            data = data.encode('UTF-8')
        encoding = guess_encoding(data)
        if encoding is None:
            encoding = 'utf-8'
        return encoding
Exemple #9
0
    def test_broken_percent(self):
        e = guess_encoding(r"""<pre>
&lt;metal:block tal:define="dummy python:
request.RESPONSE.setHeader('Content-Type',
'text/html;;charset=%s' % charset)" /&gt;
&lt;metal:block tal:define="dummy
python:request.RESPONSE.setHeader('Content-Language', lang)"
/
&gt;
</pre>
""")
        # unable to detect a valid encoding
        self.failUnlessEqual(e, None)
Exemple #10
0
 def guess_encoding(self, data):
     """ Try to guess encoding from a text value if no encoding
     guessed, used the default charset from site properties (Zope)
     with a fallback to UTF-8 (should never happen with correct
     site_properties, but always raise Attribute error without
     Zope)
     """
     if isinstance(data, type(u'')):
         # data maybe unicode but with another encoding specified
         data = data.encode('UTF-8')
     encoding = guess_encoding(data)
     if encoding is None:
         encoding = 'utf-8'
     return encoding
 def guess_encoding(self, data):
     """ Try to guess encoding from a text value if no encoding
     guessed, used the default charset from site properties (Zope)
     with a fallback to UTF-8 (should never happen with correct
     site_properties, but always raise Attribute error without
     Zope)
     """
     if isinstance(data, type(u"")):
         # data maybe unicode but with another encoding specified
         data = data.encode("UTF-8")
     encoding = guess_encoding(data)
     if encoding is None:
         encoding = "utf-8"
     return encoding
Exemple #12
0
    def test_broken_percent(self):
        e = guess_encoding(
r"""<pre>
&lt;metal:block tal:define="dummy python:
request.RESPONSE.setHeader('Content-Type',
'text/html;;charset=%s' % charset)" /&gt;
&lt;metal:block tal:define="dummy
python:request.RESPONSE.setHeader('Content-Language', lang)"
/
&gt;
</pre>
"""
    )
        # unable to detect a valid encoding
        self.failUnlessEqual(e, None)
 def guess_encoding(self, data):
     """ Try to guess encoding from a text value if no encoding
     guessed, used the default charset from site properties (Zope)
     with a fallback to UTF-8 (should never happen with correct
     site_properties, but always raise Attribute error without
     Zope)
     """
     if type(data) is type(u''):
         # data maybe unicode but with another encoding specified
         data = data.encode('UTF-8')
     encoding = guess_encoding(data)
     if encoding is None:
         try:
             site_props = getToolByName(self, 'portal_properties').site_properties
             encoding = site_props.getProperty('default_charset', 'UTF-8')
         except:
             encoding = 'UTF-8'
     return encoding
Exemple #14
0
    def testHTML(self):
        e = guess_encoding('''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html>
<head>
<title>ASPN : Python Cookbook : Auto-detect XML encoding</title>
    <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
    <meta name="robots" content="all" />
    <meta name="description" content="ActiveState Open Source Programming tools for Perl Python XML xslt scripting with free trials. Quality development tools for programmers systems administrators database administrators network administrators and webmasters" />
    <meta name="keywords" content="ActiveState,Perl,xml,xslt,mozilla,Open Source,Python,Perl for Win32,resources,PerlScript,ActivePerl,Programming,Programmers,Integrated,Development,Environment,SOAP,Linux,Solaris,Web,development,tools,free,software,download,support,Perl Resource Kit,System Administration,Sys Admin,WinNT,SQL,Oracle,Email,XML,Linux,Programming,perl,NT,2000,windows,Unix,Software,Security,   Administration,systems,windows,database,database,consulting,support,Microsoft,developer,resource,code,tutorials,IDE,Integrated development environment,developer,resources,tcl,php" />

<link rel="stylesheet" href="/ASPN/aspn.css" />

</head>

<body bgcolor="#FFFFFF" leftmargin="0" topmargin="0" marginwidth="0" marginheight="0">
charset=utf-8
</body>
</html> ''')
        self.failUnlessEqual(e, 'iso-8859-1')
Exemple #15
0
    def testHTML(self):
        e = guess_encoding(
            '''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html>
<head>
<title>ASPN : Python Cookbook : Auto-detect XML encoding</title>
    <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
    <meta name="robots" content="all" />
    <meta name="description" content="ActiveState Open Source Programming tools for Perl Python XML xslt scripting with free trials. Quality development tools for programmers systems administrators database administrators network administrators and webmasters" />
    <meta name="keywords" content="ActiveState,Perl,xml,xslt,mozilla,Open Source,Python,Perl for Win32,resources,PerlScript,ActivePerl,Programming,Programmers,Integrated,Development,Environment,SOAP,Linux,Solaris,Web,development,tools,free,software,download,support,Perl Resource Kit,System Administration,Sys Admin,WinNT,SQL,Oracle,Email,XML,Linux,Programming,perl,NT,2000,windows,Unix,Software,Security,   Administration,systems,windows,database,database,consulting,support,Microsoft,developer,resource,code,tutorials,IDE,Integrated development environment,developer,resources,tcl,php" />

<link rel="stylesheet" href="/ASPN/aspn.css" />

</head>

<body bgcolor="#FFFFFF" leftmargin="0" topmargin="0" marginwidth="0" marginheight="0">
charset=utf-8
</body>
</html> ''')
        self.failUnlessEqual(e, 'iso-8859-1')
Exemple #16
0
 def testUTF8(self):
     e = guess_encoding('\xef\xbb\xbf any UTF-8 data')
     self.failUnlessEqual(e, 'UTF-8')
     e = guess_encoding(' any UTF-8 data \xef\xbb\xbf')
     self.failUnlessEqual(e, None)
Exemple #17
0
 def testUTF8(self):
     e = guess_encoding('\xef\xbb\xbf any UTF-8 data')
     self.failUnlessEqual(e, 'UTF-8')
     e = guess_encoding(' any UTF-8 data \xef\xbb\xbf')
     self.failUnlessEqual(e, None)