def u(x): if sys.version_info[0] < 3: import codecs return codecs.unicode_escape_decode(x)[0] else: return x
def u(s): """Ensure our string is unicode independent of Python version, since Python 3 versions < 3.3 do not support the u"..." prefix""" if _IS_PYTHON_3: return s else: # not well documented but seems to work return codecs.unicode_escape_decode(s)[0]
def parse_value(lexer, symbol=None): try: if symbol is None: symbol = next(lexer) if symbol == 'null': yield ('null', None) elif symbol == 'true': yield ('boolean', True) elif symbol == 'false': yield ('boolean', False) elif symbol == '[': for event in parse_array(lexer): yield event elif symbol == '{': for event in parse_object(lexer): yield event elif symbol[0] == '"': yield ('string', unicode_escape_decode(symbol[1:-1])[0]) else: try: number = Decimal(symbol) if '.' in symbol else int(symbol) yield ('number', number) except ValueError: raise UnexpectedSymbol(symbol, lexer) except StopIteration: raise common.IncompleteJSONError()
def get_ros_root_from_setupfile(path): """ Return the ROS_ROOT if the path is a setup.sh file with an env.sh next to it which sets the ROS_ROOT :returns: path to ROS_ROOT or None """ # For groovy, we rely on setup.sh setting ROS_ROOT, as no more # rosbuild stack 'ros' exists dirpath, basename = os.path.split(path) if basename != 'setup.sh': return None # env.sh exists since fuerte setupfilename = os.path.join(dirpath, 'env.sh') if not os.path.isfile(setupfilename): return None cmd = "%s sh -c 'echo $ROS_ROOT'" % setupfilename local_env = os.environ if 'ROS_ROOT' in local_env: local_env.pop('ROS_ROOT') process = subprocess.Popen(cmd, stdout=subprocess.PIPE, env=local_env, shell=True) out = process.communicate()[0] if sys.version < '3': out_str = codecs.unicode_escape_decode(out)[0] else: out_str = out.decode('utf-8') return out_str.strip()
def unicode(string): """ Fake unicode function """ import codecs if not string: return return codecs.unicode_escape_decode(string)[0]
def get_mounts(self): mounts = {} with open("/proc/mounts") as f: for l in f: if l.startswith("/") and " " in l: k, v = l.split()[:2] mounts[k] = codecs.unicode_escape_decode(v)[0] return mounts
def to_str(x): if isinstance(x, six.text_type): return x if isinstance(x, numbers.Number): x = str(x) elif x is None: x = '' return codecs.unicode_escape_decode(x)[0]
def u(s): """ Get a unicode string, whatever that means. In Python 2, returns a unicode object; in Python 3, returns a str object. :param s: The string to unicode-ify. :type s: str :returns: str or unicode """ return codecs.unicode_escape_decode(s)[0]
def safe_unicode(s): # workaround for unicode requirements if sys.version < '3': if s: import codecs return codecs.unicode_escape_decode(s)[0] return s
def stringToUnicode(x): """ This function return a unicode string for all python versions """ if sys.version < '3': import codecs return codecs.unicode_escape_decode(x)[0] return x
def fix_reason_field(cls, reason): if not reason: return reason reason = reason.strip() if reason.startswith(u'DESC: "') and reason.endswith(u'"'): reason = u'DESC: '+reason[7:-1] # see https://forums.eveonline.com/default.aspx?g=posts&t=53350 reason = unicode_escape_decode(reason)[0] return reason
def u(text): """Create a unicode string for Python 2. @param text: The text to convert. @type text: str @return: The text converted to unicode. @rtype: unicode """ return unicode_escape_decode(text)[0]
def getXmlAttribute(dom, attribute, default=None): """ Retrieve the value of an attribute in a DOM. :param dom: DOM object. :param attribute: Attribute name. :param default: If ``attribute`` is not found in ``dom``, apply this value. :return: Value of ``attribute`` in ``dom``. """ return (unicode_escape_decode(dom.attributes[attribute].value)[0] if attribute in dom.attributes.keys() else default)
def u(x): """A function to convert input to a unicode object this function is made for porting code to python 3 :param x: a stream of text :rtype: unicode """ if sys.version < '3': # for python 2, string needs to be converted to unicode object! import codecs return codecs.unicode_escape_decode(x)[0] else: # for python 3, a string is always a unicode object! return x
def _prepare_parameter(self, parameter_name, parameter_value): '''Prepare the given parameter value''' # Prepare the given value return_value = super(SayText, self)._prepare_parameter( parameter_name, parameter_value) # Is the given parameter 'text'? if parameter_name == 'message': # Adding ESCSOH to the start of the message seems to fix colors # passed at the begining. return unicode_escape_decode(self._message_prefix)[0] + return_value # Return the prepared value return return_value
def endElement(self, name): _, attrs = self._stack.pop() if name == 'name-alias': self._aliases.append(attrs['alias']) if attrs['type'] in ('control', 'figment'): self._control = True elif name == 'char': if self._control: log.debug('Ignoring control character : {}'.format( self._aliases[0])) elif 'cp' not in attrs: log.warning('No codepoint : {}'.format(attrs['na'])) else: if len(self._characters) % 1000 == 0: log.debug('{:6d} characters in {:0.3f} seconds'.format( len(self._characters), time() - self.start)) num = attrs['cp'] # Ignore control characters (<= 20) i = int(num, 16) if i > int('20', 16): num = '{:0>8s}'.format(num) s = '\\U{}'.format(num) char = unicode_escape_decode(s)[0] entity = self.entities.get(num, '') name = attrs['na'] names = self._aliases[:] if name: names.append(name) for name in names: # if name == 'CJK UNIFIED IDEOGRAPH-#': # name = 'CJK UNIFIED IDEOGRAPH-{}'.format(num) if not ignore(name): if name in self._characters: log.warning('Duplicate character : {}'.format( name)) self._characters[name] = (num, entity, char) # print('{}\t{}'.format(name, num)) # reset self._aliases = [] self._control = False
def test_unicode_urlquote(self): # Regression tests for LIBCLOUD-429 if PY3: # Note: this is a unicode literal val = '\xe9' else: val = codecs.unicode_escape_decode('\xe9')[0] uri = urlquote(val) self.assertEqual(b(uri), b('%C3%A9')) # Unicode without unicode characters uri = urlquote('~abc') self.assertEqual(b(uri), b('%7Eabc')) # Already-encoded bytestring without unicode characters uri = urlquote(b('~abc')) self.assertEqual(b(uri), b('%7Eabc'))
def parse_html_for_urls(url): import requests, re, codecs # [^\\]* rehex = re.compile(r'^.+?[:xdigit:]*.+?$') renoequal = re.compile(r'^(.+)[=].*$') regex_hex = re.compile(r'(https://.+?\.googleusercontent\.com/[a-zA-Z0-9-_]+?)[^=]\\?(?![:xdigit:].*?)',re.U) raw_response = unicode((requests.get(url, stream=True, timeout=1).content), 'utf8') response_list = [ r.strip('"') for r in codecs.unicode_escape_decode(raw_response)[0].split(',') if r ] res = [ r for r in response_list if regex_hex.findall(r) ] ret = list(set(sorted(res))) if len(ret) == 1: return ret else: valid = [ r for r in ret if not renoequal.findall(r) ] imgurl = valid[0] if request_status_code(imgurl) == 200: return imgurl else: return '{} is not a valid URL'.format(imgurl)
def main(): start = time() count = 0 items = [] with open(TSV_FILE, 'rb') as fp: reader = csv.reader(fp, delimiter=b'\t') for row in reader: name, h, entity = [v.decode('utf-8') for v in row] s = '\\u{}'.format(h) u = unicode_escape_decode(s)[0] # char = '❤' log.info(u) count += 1 items.append(ICON_TPL.format(char=u, info='U+{}'.format(h))) if LIMIT and count == LIMIT: break html = PAGE_TPL.format(content='\n'.join(items), style=STYLE_TPL) with open(HTML_FILE, 'wb') as fp: fp.write(html.encode('utf-8')) log.info('{:d} icons generated in {:0.2f} seconds'.format( count, time() - start))
def _replace_escaped_sequences(given_string): '''Fixes all double escaped strings''' # Loop through all matches for escaped_match in set( _double_escaped_pattern.finditer(given_string)): # Get the match as a string matching_string = escaped_match.group() # Get a dictionnary of all groups matching_groups = escaped_match.groupdict() # Are we matching any octal sequences? if matching_groups['octal']: # Replace it given_string = given_string.replace( matching_string, chr(int(matching_groups['octal']))) # Otherwise, are we matching any hexadecimal sequences? elif matching_groups['hexadecimal']: # Replace it given_string = given_string.replace( matching_string, str(unhexlify( matching_groups['hexadecimal']), encoding='ascii')) # Otherwise, that means we are matching a notation else: # Replace it given_string = given_string.replace( matching_string, unicode_escape_decode(matching_string)[0]) # Return the replaced string return given_string
def u(obj): """Make unicode object""" return codecs.unicode_escape_decode(obj)[0]
def u(x): if not x: return '' return codecs.unicode_escape_decode(x)[0]
from mozilla.format.dtd import ast import re import sys class ParserError(Exception): pass name_start_char = ':A-Z_a-z\xC0-\xD6\xD8-\xF6\xF8-\u02FF' + \ '\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF'+\ '\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD' name_char = name_start_char + '\-\.0-9' + '\xB7\u0300-\u036F\u203F-\u2040' name = '[' + name_start_char + '][' + name_char + ']*' if sys.version < '3': import codecs name = codecs.unicode_escape_decode(name)[0] class Parser(): patterns = { 'entity': re.compile('<!ENTITY(\s+)(' + name + ')(\s+)(?P<op>["\'])(.*?)(?<!\\\)(?P=op)(\s*)>', re.S|re.U), 'id': re.compile('<!ENTITY\s+(' + name + ')', re.S|re.U), 'comment': re.compile('\<!\s*--(.*?)(?:--\s*\>)', re.M|re.S), } @classmethod def parse(cls, text): dtd = ast.DTD() dtd._struct = [] cls.split_comments(text, dtd, struct=True) return dtd
def u(x): byte_string, length = codecs.unicode_escape_encode(x) unicode_string, length = codecs.unicode_escape_decode(byte_string) return unicode_string
def unicodify(x): return unicode_escape_decode(x)[0]
def _decode_css_url(url): url = re.sub(r'\\(..) ', r'\\x\g<1>', url) url, _ = codecs.unicode_escape_decode(url) return url
def u(string): return codecs.unicode_escape_decode(string)[0]
def u(x): return codecs.unicode_escape_decode(x)[0]
def u(self, string): """Create a unicode string, compatible across all versions of Python.""" # NOTE(cbro): Python 3-3.2 does not have the u'' syntax. return codecs.unicode_escape_decode(string)[0]
def _str(s, encoding="UTF-8"): return unichr_escape.sub(lambda x: unicode_escape_decode(x.group(0))[0], s)
def u(x): if type(x).__name__ == 'unicode': return x else: return codecs.unicode_escape_decode(x)[0]
def u(x): if PY3K: return codecs.unicode_escape_decode(x)[0] else: return x
# Register psycopg2 compatibility hooks. try: from pyscopg2cffi import compat compat.register() except ImportError: pass else: try: import psycopg2 except ImportError: pass # Python 2/3 compatibility. if sys.version_info[0] < 3: import codecs ulit = lambda s: codecs.unicode_escape_decode(s)[0] binary_construct = buffer binary_types = buffer else: ulit = lambda s: s binary_construct = lambda s: bytes(s.encode('raw_unicode_escape')) binary_types = (bytes, memoryview) TEST_BACKEND = os.environ.get('PEEWEE_TEST_BACKEND') or 'sqlite' TEST_DATABASE = os.environ.get('PEEWEE_TEST_DATABASE') or 'peewee_test' TEST_VERBOSITY = int(os.environ.get('PEEWEE_TEST_VERBOSITY') or 1) if TEST_VERBOSITY > 1: handler = logging.StreamHandler() handler.setLevel(logging.ERROR) logger.addHandler(handler)
def uniescape(value): return codecs.unicode_escape_decode(value)[0]
def u(x): return (codecs.unicode_escape_decode(x)[0].encode( locale.getpreferredencoding()))