def sanitize_illegal_chars_for_xml(s): """Sanitize a string, removing characters illegal in XML. This will remove a number of characters that would break the XML parser. They may be in the string due to a copy/paste. This code is courtesy of the XmlRpcPlugin developers, as documented here: http://stackoverflow.com/a/22273639 """ global ILLEGAL_XML_CHARS_RE if ILLEGAL_XML_CHARS_RE is None: _illegal_unichrs = [(0x00, 0x08), (0x0B, 0x0C), (0x0E, 0x1F), (0x7F, 0x84), (0x86, 0x9F), (0xFDD0, 0xFDDF), (0xFFFE, 0xFFFF)] if sys.maxunicode > 0x10000: _illegal_unichrs += [(0x1FFFE, 0x1FFFF), (0x2FFFE, 0x2FFFF), (0x3FFFE, 0x3FFFF), (0x4FFFE, 0x4FFFF), (0x5FFFE, 0x5FFFF), (0x6FFFE, 0x6FFFF), (0x7FFFE, 0x7FFFF), (0x8FFFE, 0x8FFFF), (0x9FFFE, 0x9FFFF), (0xAFFFE, 0xAFFFF), (0xBFFFE, 0xBFFFF), (0xCFFFE, 0xCFFFF), (0xDFFFE, 0xDFFFF), (0xEFFFE, 0xEFFFF), (0xFFFFE, 0xFFFFF), (0x10FFFE, 0x10FFFF)] ILLEGAL_XML_CHARS_RE = re.compile('[%s]' % ''.join([ '%s-%s' % (six.unichr(low), six.unichr(high)) for low, high in _illegal_unichrs ])) if isinstance(s, bytes): s = s.decode('utf-8') return ILLEGAL_XML_CHARS_RE.sub('', s)
def test_unicode_header_checks(self): access_token = u'foo' client_id = u'some_client_id' client_secret = u'cOuDdkfjxxnv+' refresh_token = u'1/0/a.df219fjls0' token_expiry = str(datetime.datetime.utcnow()) token_uri = str(GOOGLE_TOKEN_URI) revoke_uri = str(GOOGLE_REVOKE_URI) user_agent = u'refresh_checker/1.0' credentials = OAuth2Credentials(access_token, client_id, client_secret, refresh_token, token_expiry, token_uri, user_agent, revoke_uri=revoke_uri) # First, test that we correctly encode basic objects, making sure # to include a bytes object. Note that oauth2client will normalize # everything to bytes, no matter what python version we're in. http = credentials.authorize(HttpMock(headers={'status': '200'})) headers = {u'foo': 3, b'bar': True, 'baz': b'abc'} cleaned_headers = {b'foo': b'3', b'bar': b'True', b'baz': b'abc'} http.request(u'http://example.com', method=u'GET', headers=headers) for k, v in cleaned_headers.items(): self.assertTrue(k in http.headers) self.assertEqual(v, http.headers[k]) # Next, test that we do fail on unicode. unicode_str = six.unichr(40960) + 'abcd' self.assertRaises( NonAsciiHeaderError, http.request, u'http://example.com', method=u'GET', headers={u'foo': unicode_str})
def _replace_entity(match): text = match.group(1) if text[0] == '#': text = text[1:] try: if text[0] in 'xX': c = int(text[1:], 16) else: c = int(text) return six.unichr(c) except ValueError: return match.group(0) else: try: return six.unichr(html_entities.name2codepoint[text]) except (ValueError, KeyError): return match.group(0)
class PageForm(forms.ModelForm): meta_description = forms.CharField(widget=forms.Textarea, required=False) meta_keywords = forms.CharField(widget=forms.Textarea, required=False) parent = TreeNodeChoiceField(queryset=Page.tree.all(), level_indicator=3 * unichr(160), empty_label='---------', required=False) redirect_page = TreeNodeChoiceField( label=_('Redirect page'), queryset=Page.objects.filter(redirect_page__isnull=True), level_indicator=3 * unichr(160), empty_label='---------', required=False) class Meta: model = Page exclude = [] def __init__(self, *args, **kwargs): super(PageForm, self).__init__(*args, **kwargs) if len(TEMPLATE_CHOICES) > 0: self.fields['template_name'] = forms.ChoiceField( choices=TEMPLATE_CHOICES, required=False, label=_('Template')) def clean_title(self): """ Strips extra whitespace """ return self.cleaned_data.get('title', '').strip() def clean_redirect_page(self): if self.cleaned_data['redirect_page']: try: if self.cleaned_data['url'] and is_quoted_url( self.cleaned_data['url']): raise forms.ValidationError( _('A named url can\'t be combined with a redirect page' )) except KeyError: pass return self.cleaned_data['redirect_page']
def set_row_color(self, sheet, row, header, color='FF000000', bgcolor='FFFFFFFF'): for idx, item in enumerate(header): sheet[str(six.unichr(idx + ord('A'))) + str(row)].style = self.get_cell_style(color=color, bgcolor=bgcolor) return row
def sanitize_illegal_chars_for_xml(s): """Sanitize a string, removing characters illegal in XML. This will remove a number of characters that would break the XML parser. They may be in the string due to a copy/paste. This code is courtesy of the XmlRpcPlugin developers, as documented here: http://stackoverflow.com/a/22273639 """ global ILLEGAL_XML_CHARS_RE if ILLEGAL_XML_CHARS_RE is None: _illegal_unichrs = [ (0x00, 0x08), (0x0B, 0x0C), (0x0E, 0x1F), (0x7F, 0x84), (0x86, 0x9F), (0xFDD0, 0xFDDF), (0xFFFE, 0xFFFF) ] if sys.maxunicode > 0x10000: _illegal_unichrs += [ (0x1FFFE, 0x1FFFF), (0x2FFFE, 0x2FFFF), (0x3FFFE, 0x3FFFF), (0x4FFFE, 0x4FFFF), (0x5FFFE, 0x5FFFF), (0x6FFFE, 0x6FFFF), (0x7FFFE, 0x7FFFF), (0x8FFFE, 0x8FFFF), (0x9FFFE, 0x9FFFF), (0xAFFFE, 0xAFFFF), (0xBFFFE, 0xBFFFF), (0xCFFFE, 0xCFFFF), (0xDFFFE, 0xDFFFF), (0xEFFFE, 0xEFFFF), (0xFFFFE, 0xFFFFF), (0x10FFFE, 0x10FFFF) ] ILLEGAL_XML_CHARS_RE = re.compile('[%s]' % ''.join([ '%s-%s' % (six.unichr(low), six.unichr(high)) for low, high in _illegal_unichrs ])) if isinstance(s, bytes): s = s.decode('utf-8') return ILLEGAL_XML_CHARS_RE.sub('', s)
def valid_javascript_identifier(identifier, escape='\\u', ucd_cat=category): """Return whether the given ``id`` is a valid Javascript identifier.""" if not identifier: return False if not isinstance(identifier, text_type): try: identifier = text_type(identifier, 'utf-8') except UnicodeDecodeError: return False if escape in identifier: new = [] add_char = new.append split_id = identifier.split(escape) add_char(split_id.pop(0)) for segment in split_id: if len(segment) < 4: return False try: add_char(unichr(int('0x' + segment[:4], 16))) except Exception: return False add_char(segment[4:]) identifier = ''.join(new) if is_reserved_js_word(identifier): return False first_char = identifier[0] if not ((first_char in valid_jsid_chars) or (ucd_cat(first_char) in valid_jsid_categories_start)): return False for char in identifier[1:]: if not ((char in valid_jsid_chars) or (ucd_cat(char) in valid_jsid_categories)): return False return True
def is_valid_javascript_identifier(identifier, escape=r'\\u', ucd_cat=category): """Return whether the given ``id`` is a valid Javascript identifier.""" if not identifier: return False if not isinstance(identifier, six.text_type): try: identifier = six.text_type(identifier, 'utf-8') except UnicodeDecodeError: return False if escape in identifier: new = [] add_char = new.append split_id = identifier.split(escape) add_char(split_id.pop(0)) for segment in split_id: if len(segment) < 4: return False try: add_char(six.unichr(int('0x' + segment[:4], 16))) except Exception: return False add_char(segment[4:]) identifier = u''.join(new) if is_reserved_js_word(identifier): return False first_char = identifier[0] if not ((first_char in valid_jsid_chars) or (ucd_cat(first_char) in valid_jsid_categories_start)): return False for char in identifier[1:]: if not ((char in valid_jsid_chars) or (ucd_cat(char) in valid_jsid_categories)): return False return True
def test_get_markdown_element_tree_with_named_entities(self): """Testing get_markdown_element_tree with named entities""" rendered_html_entities = ['&fooooo;'] expected_html_entities = ['?'] # toxml() will convert a select list of characters into named # entities when generating the string. We need to account for this. # These aren't expanded when text is processed in # get_markdown_element_tree(). They'll be &#...; entities. toxml_expanded_chars = { '&': '&', '<': '<', '>': '>', '"': '"', } for char_code, entity_name in six.iteritems(codepoint2name): rendered_html_entities.append('&%s;' % entity_name) char = six.unichr(char_code) expected_html_entities.append(toxml_expanded_chars.get(char, char)) node = get_markdown_element_tree(''.join(rendered_html_entities)) self.assertEqual(node[0].toxml(), ''.join(expected_html_entities))
def _callback(matches): match_id = matches.group(1) return six.unichr(int(match_id))
from math import sqrt from django.utils import six from django.core.cache import caches from django.utils.html import strip_tags from django.contrib.sites.models import Site from django.utils.functional import cached_property from django.core.cache import InvalidCacheBackendError from zinnia.models.entry import Entry from zinnia.settings import STOP_WORDS from zinnia.settings import COMPARISON_FIELDS PUNCTUATION = dict.fromkeys( i for i in range(sys.maxunicode) if unicodedata.category(six.unichr(i)).startswith('P')) def pearson_score(list1, list2): """ Compute the Pearson' score between 2 lists of vectors. """ size = len(list1) sum1 = sum(list1) sum2 = sum(list2) sum_sq1 = sum([pow(l, 2) for l in list1]) sum_sq2 = sum([pow(l, 2) for l in list2]) prod_sum = sum([list1[i] * list2[i] for i in range(size)]) num = prod_sum - (sum1 * sum2 / float(size))
import re from django.utils.six import unichr from django.utils.six.moves import html_entities name2codepoint = html_entities.name2codepoint.copy() name2codepoint['apos'] = ord("'") _ENTITY_REF = re.compile(r'&(?:#(\d+)|(?:#x([\da-fA-F]+))|([a-zA-Z]+));') _ENTITY_REPLACE = [ lambda code: unichr(int(code, 10)) if code else None, lambda code: unichr(int(code, 16)) if code else None, lambda code: unichr(name2codepoint[code]) if code in name2codepoint else None ] def htmlentitydecode(s): def unescape(match): for i, sub in enumerate(_ENTITY_REPLACE, start=1): replaced = sub(match.group(i)) if replaced is not None: return replaced return match.group(0) return _ENTITY_REF.sub(unescape, s)
import datetime import json from django.db import connection from django.test import override_settings from django.utils import six from rest_framework.test import APITestCase from tests.models import Cat, Group, Location, Profile, User from tests.serializers import NestedEphemeralSerializer from tests.setup import create_fixture UNICODE_STRING = six.unichr(9629) # unicode heart # UNICODE_URL_STRING = urllib.quote(UNICODE_STRING.encode('utf-8')) UNICODE_URL_STRING = '%E2%96%9D' @override_settings( DYNAMIC_REST={ 'ENABLE_LINKS': False } ) class TestUsersAPI(APITestCase): def setUp(self): self.fixture = create_fixture() self.maxDiff = None def test_get(self): with self.assertNumQueries(1): # 1 for User, 0 for Location
from __future__ import unicode_literals
import unicodedata from math import sqrt from django.utils import six from django.core.cache import caches from django.utils.html import strip_tags from django.contrib.sites.models import Site from django.utils.functional import cached_property from django.core.cache import InvalidCacheBackendError from zinnia.models.entry import Entry from zinnia.settings import STOP_WORDS from zinnia.settings import COMPARISON_FIELDS PUNCTUATION = dict.fromkeys(i for i in range(sys.maxunicode) if unicodedata.category(six.unichr(i)).startswith("P")) def pearson_score(list1, list2): """ Compute the Pearson' score between 2 lists of vectors. """ size = len(list1) sum1 = sum(list1) sum2 = sum(list2) sum_sq1 = sum([pow(l, 2) for l in list1]) sum_sq2 = sum([pow(l, 2) for l in list2]) prod_sum = sum([list1[i] * list2[i] for i in range(size)]) num = prod_sum - (sum1 * sum2 / float(size))