def test_adapter_registered(self): """ test naaya.i18n's IPreferredCharset adapter is registered """ req = create_request() adapter = IUserPreferredCharsets(req) self.assertTrue(isinstance(adapter, NyHTTPCharsets)) self.assertEqual(adapter.getPreferredCharsets(), ['utf-8', 'iso-8859-1', '*'])
def _decode(self, text): """Try to decode the text using one of the available charsets.""" # All text comes from parse_qsl or multipart.parse_form_data, and # has already been decoded into Unicode using WSGI's privileged # encoding (ISO-8859-1). if self.charsets is None: envadapter = IUserPreferredCharsets(self) self.charsets = envadapter.getPreferredCharsets() or ['utf-8'] self.charsets = [c for c in self.charsets if c != '*'] if not isinstance(text, bytes): if self.charsets and self.charsets[0] == 'iso-8859-1': # optimization: we are trying to decode something already # decoded for us, let's just return it rather than waste # time decoding... return text # undo what parse_qsl/multipart.parse_form_data did and maintain # backwards compat text = text.encode('latin-1') for charset in self.charsets: try: text = text.decode(charset) break except UnicodeError: pass # XXX so when none of the provided charsets works we just return bytes # and let the application crash??? return text
def processInputs(request, charsets=None): """ Override Products.Five.browser.decode.processInputs """ if charsets is None: envadapter = IUserPreferredCharsets(request) charsets = envadapter.getPreferredCharsets() or ['utf-8'] for name, value in request.form.items(): if not (isCGI_NAME(name) or name.startswith('HTTP_')): # XXX => really dirty if name=='groups' or name=='users': request.form[name] = value elif isinstance(value, str): request.form[name] = _decode(value, charsets) elif isinstance(value, list): request.form[name] = [_decode(val, charsets) for val in value if isinstance(val, str)] elif isinstance(value, tuple): request.form[name] = tuple([_decode(val, charsets) for val in value if isinstance(val, str)])
def _decode(self, text): """Try to decode the text using one of the available charsets.""" # All text comes from cgi.FieldStorage. On Python 2 it's all bytes # and we must decode. On Python 3 it's already been decoded into # Unicode, using the charset we specified when instantiating the # FieldStorage instance (Latin-1). if self.charsets is None: envadapter = IUserPreferredCharsets(self) self.charsets = envadapter.getPreferredCharsets() or ['utf-8'] self.charsets = [c for c in self.charsets if c != '*'] if not PYTHON2 and not isinstance(text, bytes): if self.charsets and self.charsets[0] == 'iso-8859-1': # optimization: we are trying to decode something # cgi.FieldStorage already decoded for us, let's just return it # rather than waste time decoding... return text # undo what cgi.FieldStorage did and maintain backwards compat text = text.encode('latin-1') for charset in self.charsets: try: text = text.decode(charset) break except UnicodeError: pass # XXX so when none of the provided charsets works we just return bytes # and let the application crash??? return text
def processInputs(request, charsets=None): """Process the values in request.form to decode strings to unicode, using the passed-in list of charsets. If none are passed in, look up the user's preferred charsets. The default is to use utf-8. """ if IProcessedRequest.providedBy(request): return if charsets is None: envadapter = IUserPreferredCharsets(request, None) if envadapter is None: charsets = ['utf-8'] else: charsets = envadapter.getPreferredCharsets() or ['utf-8'] for name, value in request.form.items(): if not (isCGI_NAME(name) or name.startswith('HTTP_')): if isinstance(value, str): request.form[name] = _decode(value, charsets) elif isinstance(value, (list, tuple,)): newValue = [] for val in value: if isinstance(val, str): val = _decode(val, charsets) newValue.append(val) if isinstance(value, tuple): newValue = tuple(value) request.form[name] = newValue interface.alsoProvides(request, IProcessedRequest)
def _get_charsets(self): charsets = self._charsets if charsets is None: envadapter = IUserPreferredCharsets(self) charsets = envadapter.getPreferredCharsets() or ['utf-8'] self._charsets = charsets return charsets
def _setPageEncoding(self): """Set the encoding of the form page via the Content-Type header. ZPublisher uses the value of this header to determine how to encode unicode data for the browser.""" envadapter = IUserPreferredCharsets(self.request) charsets = envadapter.getPreferredCharsets() or ['utf-8'] self.request.RESPONSE.setHeader( 'Content-Type', 'text/html; charset=%s' % charsets[0])
def _setPageEncoding(self): """Set the encoding of the form page via the Content-Type header. ZPublisher uses the value of this header to determine how to encode unicode data for the browser.""" envadapter = IUserPreferredCharsets(self.request) charsets = envadapter.getPreferredCharsets() or ['utf-8'] self.request.RESPONSE.setHeader('Content-Type', 'text/html; charset=%s' % charsets[0])
def processInputs(request, charsets=None): if charsets is None: envadapter = IUserPreferredCharsets(request) charsets = envadapter.getPreferredCharsets() or ['utf-8'] for name, value in request.form.items(): if (not (isCGI_NAME(name) or name.startswith('HTTP_')) and isinstance(value, str)): request.form[name] = _decode(value, charsets)
def __call__(self): # XXX dirty hack to make the values coming out of here # encoded properly, by default please fix me. envadapter = IUserPreferredCharsets(self.request) charsets = envadapter.getPreferredCharsets() or ['utf-8'] value = self.template() if not isinstance(value, unicode): value = decode._decode(self.template(), charsets) return value
def _decode(self, text): """Try to decode the text using one of the available charsets.""" if self.charsets is None: envadapter = IUserPreferredCharsets(self.request) self.charsets = envadapter.getPreferredCharsets() or ['utf-8'] for charset in self.charsets: try: text = unicode(text, charset) break except UnicodeError: pass return text
def setPageEncoding(request): """Set the encoding of the form page via the Content-Type header. ZPublisher uses the value of this header to determine how to encode unicode data for the browser. """ warn(u'setPageEncoding() is deprecated and will be removed in Zope 2.16. ' u'It is recommended to let the ZPublisher use the default_encoding. ' u'Please consider setting default-zpublisher-encoding to utf-8.', DeprecationWarning, stacklevel=2) envadapter = IUserPreferredCharsets(request) charsets = envadapter.getPreferredCharsets() or ['utf-8'] request.RESPONSE.setHeader( 'Content-Type', 'text/html; charset=%s' % charsets[0])
def setPageEncoding(request): """Set the encoding of the form page via the Content-Type header. ZPublisher uses the value of this header to determine how to encode unicode data for the browser. """ warn(u'setPageEncoding() is deprecated and will be removed in Zope 5.0. ' u'It is recommended to let the ZPublisher use the default_encoding. ' u'Please consider setting default-zpublisher-encoding to utf-8.', DeprecationWarning, stacklevel=2) envadapter = IUserPreferredCharsets(request) charsets = envadapter.getPreferredCharsets() or ['utf-8'] request.RESPONSE.setHeader( 'Content-Type', 'text/html; charset=%s' % charsets[0])
def getCharsetUsingRequest(request): 'See IHTTPResponse' envadapter = IUserPreferredCharsets(request, None) if envadapter is None: return try: charset = envadapter.getPreferredCharsets()[0] except IndexError: # Exception caused by empty list! This is okay though, since the # browser just could have sent a '*', which means we can choose # the encoding, which we do here now. charset = 'utf-8' return charset
def processInputs(request, charsets=None): """Process the values in request.form to decode strings to unicode, using the passed-in list of charsets. If none are passed in, look up the user's preferred charsets. The default is to use utf-8. """ if charsets is None: envadapter = IUserPreferredCharsets(request, None) if envadapter is None: charsets = ['utf-8'] else: charsets = envadapter.getPreferredCharsets() or ['utf-8'] for name, value in request.form.items(): if not (name in isCGI_NAMEs or name.startswith('HTTP_')): request.form[name] = processInputValue(value, charsets)
def processInputs(request, charsets=None): if charsets is None: envadapter = IUserPreferredCharsets(request) charsets = envadapter.getPreferredCharsets() or ['utf-8'] for name, value in request.form.items(): if not (isCGI_NAME(name) or name.startswith('HTTP_')): if isinstance(value, str): request.form[name] = _decode(value, charsets) elif isinstance(value, list): request.form[name] = [ _decode(val, charsets) for val in value if isinstance(val, str) ] elif isinstance(value, tuple): request.form[name] = tuple([ _decode(val, charsets) for val in value if isinstance(val, str) ])
def to_unicode(text): if not charsets: envadapter = IUserPreferredCharsets(request, None) if envadapter: pref = envadapter.getPreferredCharsets() if pref: charsets.extend(pref) if not charsets: charsets.append('utf-8') for charset in charsets: try: return unicode(text, charset) except UnicodeError: pass raise UnicodeError( "Unable to decode %s using any of the character sets: %s" % (repr(text), repr(charsets)))
def _decode(self, text): """Try to decode the text using one of the available charsets.""" # According to PEP-3333, in python-3, QUERY_STRING is a string, # representing 'latin-1' encoded byte array. So, if we are in python-3 # context, encode text as 'latin-1' first, to try to decode # resulting byte array using user-supplied charset. if not isinstance(text, bytes): text = text.encode('latin-1') if self.charsets is None: envadapter = IUserPreferredCharsets(self) self.charsets = envadapter.getPreferredCharsets() or ['utf-8'] self.charsets = [c for c in self.charsets if c != '*'] for charset in self.charsets: try: text = _u(text, charset) break except UnicodeError: pass return text
def processInputs(request, charsets=None): """Process the values in request.form to decode strings to unicode, using the passed-in list of charsets. If none are passed in, look up the user's preferred charsets. The default is to use utf-8. """ warn(u'processInputs() is deprecated and will be removed in Zope 2.16. If ' u'your view implements IBrowserPage, similar processing is now ' u'executed automatically.', DeprecationWarning, stacklevel=2) if charsets is None: envadapter = IUserPreferredCharsets(request, None) if envadapter is None: charsets = ['utf-8'] else: charsets = envadapter.getPreferredCharsets() or ['utf-8'] for name, value in request.form.items(): if not (name in isCGI_NAMEs or name.startswith('HTTP_')): request.form[name] = processInputValue(value, charsets)
def processInputs(request, charsets=None): """Process the values in request.form to decode binary_type to text_type, using the passed-in list of charsets. If none are passed in, look up the user's preferred charsets. The default is to use utf-8. """ warn(u'processInputs() is deprecated and will be removed in Zope 5.0. If ' u'your view implements IBrowserPage, similar processing is now ' u'executed automatically.', DeprecationWarning, stacklevel=2) if charsets is None: envadapter = IUserPreferredCharsets(request, None) if envadapter is None: charsets = ['utf-8'] else: charsets = envadapter.getPreferredCharsets() or ['utf-8'] for name, value in list(request.form.items()): if not (name in isCGI_NAMEs or name.startswith('HTTP_')): request.form[name] = processInputValue(value, charsets)
def resolve(self, context, text, expression): request = context.REQUEST charsets = getattr(request, '__zpt_available_charsets', None) if charsets is None: charsets = IUserPreferredCharsets(request).getPreferredCharsets() # add management_page_charset as one fallback management_charset = getattr(context, 'management_page_charset', None) if management_charset: charsets.append(management_charset) # add Python's default encoding as last fallback charsets.append(default_encoding) # cache list of charsets request.__zpt_available_charsets = charsets for enc in charsets: if enc == '*': continue try: return unicode(text, enc) except (LookupError, UnicodeDecodeError): pass return text
def processInputs(request, charsets=None): """Process the values in request.form to decode strings to unicode, using the passed-in list of charsets. If none are passed in, look up the user's preferred charsets. The default is to use utf-8. """ warn( u"processInputs() is deprecated and will be removed in Zope 2.16. If " u"your view implements IBrowserPage, similar processing is now " u"executed automatically.", DeprecationWarning, stacklevel=2, ) if charsets is None: envadapter = IUserPreferredCharsets(request, None) if envadapter is None: charsets = ["utf-8"] else: charsets = envadapter.getPreferredCharsets() or ["utf-8"] for name, value in request.form.items(): if not (name in isCGI_NAMEs or name.startswith("HTTP_")): request.form[name] = processInputValue(value, charsets)
def resolve(self, context, text, expression): if isinstance(text, text_type): return text request = aq_get(context, 'REQUEST', None) # Deal with the fact that a REQUEST is not always available. # In this case fall back to the encoding of the ZMI and the # Python default encoding. if request is None: charsets = [ ZPublisher.HTTPRequest.default_encoding, default_encoding ] else: # charsets might by cached within the request charsets = getattr(request, '__zpt_available_charsets', None) # No uncached charsets found: investigate the HTTP_ACCEPT_CHARSET # header. This code is only called if 'context' has a request # object. The condition is true because otherwise 'charsets' contains # at least the default encoding of Python. if charsets is None: charsets = list() # add Python's default encoding as last fallback charsets.append(default_encoding) # include the charsets based on the HTTP_ACCEPT_CHARSET # header charsets = IUserPreferredCharsets( request).getPreferredCharsets() + charsets # cache list of charsets request.__zpt_available_charsets = charsets for enc in charsets: if enc == '*': continue try: return text.decode(enc) except (LookupError, UnicodeDecodeError): pass # FIXME: Shouldn't this raise an Exception or signal an error somehow? return text
def resolve(self, context, text, expression): request = aq_get(context, 'REQUEST', None) # Deal with the fact that a REQUEST is not always available. # In this case fall back to the encoding of the ZMI and the # Python default encoding. if request is None: charsets = [default_encoding] management_charset = getattr(context, 'management_page_charset', None) if management_charset: charsets.insert(0, management_charset) else: # charsets might by cached within the request charsets = getattr(request, '__zpt_available_charsets', None) # No uncached charsets found: investigate the HTTP_ACCEPT_CHARSET # header. This code is only called if 'context' has a request # object. The condition is true because otherwise 'charsets' contains # at least the default encoding of Python. if charsets is None: charsets = list() # add Python's default encoding as last fallback charsets.append(default_encoding) # include the charsets based on the HTTP_ACCEPT_CHARSET # header charsets = IUserPreferredCharsets(request).getPreferredCharsets() +\ charsets # cache list of charsets request.__zpt_available_charsets = charsets for enc in charsets: if enc == '*': continue try: return unicode(text, enc) except (LookupError, UnicodeDecodeError): pass return text
def getBrowserCharset(request): """ Get charset preferred by the browser. """ envadapter = IUserPreferredCharsets(request) charsets = envadapter.getPreferredCharsets() or ['utf-8'] return charsets[0]