def testEmacs(self): e = guess_encoding('# -*- coding: UTF-8 -*-') self.failUnlessEqual(e, 'UTF-8') e = guess_encoding(''' ### -*- coding: ISO-8859-1 -*- ''') self.failUnlessEqual(e, 'ISO-8859-1') e = guess_encoding(''' ### -*- coding: ISO-8859-1 -*- ''') self.failUnlessEqual(e, None)
def testVim(self): e = guess_encoding('# vim:fileencoding=UTF-8') self.failUnlessEqual(e, 'UTF-8') e = guess_encoding(''' ### vim:fileencoding=ISO-8859-1 ''') self.failUnlessEqual(e, 'ISO-8859-1') e = guess_encoding(''' ### vim:fileencoding= ISO-8859-1 ''') self.failUnlessEqual(e, None)
def testVim(self): e = guess_encoding('# vim:fileencoding=UTF-8') self.failUnlessEqual(e, 'UTF-8') e = guess_encoding(''' ### vim:fileencoding=ISO-8859-1 ''') self.failUnlessEqual(e, 'ISO-8859-1') e = guess_encoding(''' ### vim:fileencoding= ISO-8859-1 ''') self.failUnlessEqual(e, None)
def testEmacs(self): e = guess_encoding('# -*- coding: UTF-8 -*-') self.failUnlessEqual(e, 'UTF-8') e = guess_encoding(''' ### -*- coding: ISO-8859-1 -*- ''') self.failUnlessEqual(e, 'ISO-8859-1') e = guess_encoding(''' ### -*- coding: ISO-8859-1 -*- ''') self.failUnlessEqual(e, None)
def testXML(self): e = guess_encoding('<?xml?>') self.failUnlessEqual(e, 'UTF-8') e = guess_encoding('''<?xml version="1.0" encoding="ISO-8859-1" ?> ''') self.failUnlessEqual(e, 'ISO-8859-1') e = guess_encoding('''<?xml version="1.0" encoding="ISO-8859-1"?> ''') self.failUnlessEqual(e, 'ISO-8859-1') e = guess_encoding('''<?xml version="1.0" encoding="ISO-8859-1"?><truc encoding="UTF-8"> </truc> ''') self.failUnlessEqual(e, 'ISO-8859-1')
def testXML(self): e = guess_encoding('<?xml?>') self.failUnlessEqual(e, 'UTF-8') e = guess_encoding('''<?xml version="1.0" encoding="ISO-8859-1" ?> ''') self.failUnlessEqual(e, 'ISO-8859-1') e = guess_encoding('''<?xml version="1.0" encoding="ISO-8859-1"?> ''') self.failUnlessEqual(e, 'ISO-8859-1') e = guess_encoding( '''<?xml version="1.0" encoding="ISO-8859-1"?><truc encoding="UTF-8"> </truc> ''') self.failUnlessEqual(e, 'ISO-8859-1')
def guess_encoding(self, data): """ Try to guess encoding from a text value. If no encoding can be guessed, fall back to utf-8. """ if isinstance(data, six.text_type): # data maybe unicode but with another encoding specified data = data.encode('UTF-8') encoding = guess_encoding(data) if encoding is None: encoding = 'utf-8' return encoding
def guess_encoding(self, data): """ Try to guess encoding from a text value. If no encoding can be guessed, fall back to utf-8. """ if isinstance(data, type(u'')): # data maybe unicode but with another encoding specified data = data.encode('UTF-8') encoding = guess_encoding(data) if encoding is None: encoding = 'utf-8' return encoding
def test_broken_percent(self): e = guess_encoding(r"""<pre> <metal:block tal:define="dummy python: request.RESPONSE.setHeader('Content-Type', 'text/html;;charset=%s' % charset)" /> <metal:block tal:define="dummy python:request.RESPONSE.setHeader('Content-Language', lang)" / > </pre> """) # unable to detect a valid encoding self.failUnlessEqual(e, None)
def guess_encoding(self, data): """ Try to guess encoding from a text value if no encoding guessed, used the default charset from site properties (Zope) with a fallback to UTF-8 (should never happen with correct site_properties, but always raise Attribute error without Zope) """ if isinstance(data, type(u'')): # data maybe unicode but with another encoding specified data = data.encode('UTF-8') encoding = guess_encoding(data) if encoding is None: encoding = 'utf-8' return encoding
def guess_encoding(self, data): """ Try to guess encoding from a text value if no encoding guessed, used the default charset from site properties (Zope) with a fallback to UTF-8 (should never happen with correct site_properties, but always raise Attribute error without Zope) """ if isinstance(data, type(u"")): # data maybe unicode but with another encoding specified data = data.encode("UTF-8") encoding = guess_encoding(data) if encoding is None: encoding = "utf-8" return encoding
def test_broken_percent(self): e = guess_encoding( r"""<pre> <metal:block tal:define="dummy python: request.RESPONSE.setHeader('Content-Type', 'text/html;;charset=%s' % charset)" /> <metal:block tal:define="dummy python:request.RESPONSE.setHeader('Content-Language', lang)" / > </pre> """ ) # unable to detect a valid encoding self.failUnlessEqual(e, None)
def guess_encoding(self, data): """ Try to guess encoding from a text value if no encoding guessed, used the default charset from site properties (Zope) with a fallback to UTF-8 (should never happen with correct site_properties, but always raise Attribute error without Zope) """ if type(data) is type(u''): # data maybe unicode but with another encoding specified data = data.encode('UTF-8') encoding = guess_encoding(data) if encoding is None: try: site_props = getToolByName(self, 'portal_properties').site_properties encoding = site_props.getProperty('default_charset', 'UTF-8') except: encoding = 'UTF-8' return encoding
def testHTML(self): e = guess_encoding('''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> <html> <head> <title>ASPN : Python Cookbook : Auto-detect XML encoding</title> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" /> <meta name="robots" content="all" /> <meta name="description" content="ActiveState Open Source Programming tools for Perl Python XML xslt scripting with free trials. Quality development tools for programmers systems administrators database administrators network administrators and webmasters" /> <meta name="keywords" content="ActiveState,Perl,xml,xslt,mozilla,Open Source,Python,Perl for Win32,resources,PerlScript,ActivePerl,Programming,Programmers,Integrated,Development,Environment,SOAP,Linux,Solaris,Web,development,tools,free,software,download,support,Perl Resource Kit,System Administration,Sys Admin,WinNT,SQL,Oracle,Email,XML,Linux,Programming,perl,NT,2000,windows,Unix,Software,Security, Administration,systems,windows,database,database,consulting,support,Microsoft,developer,resource,code,tutorials,IDE,Integrated development environment,developer,resources,tcl,php" /> <link rel="stylesheet" href="/ASPN/aspn.css" /> </head> <body bgcolor="#FFFFFF" leftmargin="0" topmargin="0" marginwidth="0" marginheight="0"> charset=utf-8 </body> </html> ''') self.failUnlessEqual(e, 'iso-8859-1')
def testHTML(self): e = guess_encoding( '''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> <html> <head> <title>ASPN : Python Cookbook : Auto-detect XML encoding</title> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" /> <meta name="robots" content="all" /> <meta name="description" content="ActiveState Open Source Programming tools for Perl Python XML xslt scripting with free trials. Quality development tools for programmers systems administrators database administrators network administrators and webmasters" /> <meta name="keywords" content="ActiveState,Perl,xml,xslt,mozilla,Open Source,Python,Perl for Win32,resources,PerlScript,ActivePerl,Programming,Programmers,Integrated,Development,Environment,SOAP,Linux,Solaris,Web,development,tools,free,software,download,support,Perl Resource Kit,System Administration,Sys Admin,WinNT,SQL,Oracle,Email,XML,Linux,Programming,perl,NT,2000,windows,Unix,Software,Security, Administration,systems,windows,database,database,consulting,support,Microsoft,developer,resource,code,tutorials,IDE,Integrated development environment,developer,resources,tcl,php" /> <link rel="stylesheet" href="/ASPN/aspn.css" /> </head> <body bgcolor="#FFFFFF" leftmargin="0" topmargin="0" marginwidth="0" marginheight="0"> charset=utf-8 </body> </html> ''') self.failUnlessEqual(e, 'iso-8859-1')
def testUTF8(self): e = guess_encoding('\xef\xbb\xbf any UTF-8 data') self.failUnlessEqual(e, 'UTF-8') e = guess_encoding(' any UTF-8 data \xef\xbb\xbf') self.failUnlessEqual(e, None)
def testUTF8(self): e = guess_encoding('\xef\xbb\xbf any UTF-8 data') self.failUnlessEqual(e, 'UTF-8') e = guess_encoding(' any UTF-8 data \xef\xbb\xbf') self.failUnlessEqual(e, None)