def _unicodePreDecode(self, code=None): code = code if code else self.code current = code[0:2] == '\\u' and 'unicode' or 'raw' tokens = [] tokens.append([current, ""]) i = 0 while i < len(code): if code[i:i + 2].lower() == '\\u': if current == "unicode": tokens[len(tokens) - 1][1] += codecs.raw_unicode_escape_decode( code[i:i + 6])[0] else: current = 'unicode' tokens.append([ current, codecs.raw_unicode_escape_decode(code[i:i + 6])[0] ]) i += 6 else: if current != 'unicode': tokens[len(tokens) - 1][1] += code[i] else: current = 'raw' tokens.append([current, code[i:i + 1]]) i += 1 return tokens
def retrieve_json(url, method='GET', data=None): """ Utility: URL's body fetching :rtype : dict() :param url: URL to fetch :param method: the method to use for the request :param data: if method is POST, pass also some data for request's body :return: dictionary from the response's body """ print(url) if method == 'GET': try: # avoid unicode escaping problems (double backslash encoding) h = html.parser.HTMLParser() text = h.unescape(requests.get(url).text) return json.loads(raw_unicode_escape_decode(text)[0]) except json.JSONDecodeError as e: raise e elif method == 'POST': if data is not None: try: h = html.parser.HTMLParser() text = h.unescape(requests.post(url, data=data).text) return json.loads(raw_unicode_escape_decode(text)[0]) # using bs4 to treat html entities except json.JSONDecodeError as e: raise e else: raise Exception('retrieve_json(): data for POST cannot be None') else: raise Exception('retrieve_json(): Wrong Method')
def _unicodePreDecode(self, code=None): ''' unicode类型解码预处理 从原始code中识别出unicode编码、原始(ASCII)子串; @returns [[type, token]]: token数组,数组中每个元素包含类型和token字符串,token类型为"unicode"、"raw" ''' code = code if code else self.code current = code[0:2]=='\\u' and 'unicode' or 'raw' tokens = [] tokens.append([current, ""]) i = 0 while i<len(code): if code[i:i+2].lower() == '\\u' and self._isHex(code[i+2:i+4]) and self._isHex(code[i+4:i+6]): if current == "unicode": tokens[len(tokens)-1][1] += codecs.raw_unicode_escape_decode(code[i:i+6])[0] else: current = 'unicode' tokens.append([current, codecs.raw_unicode_escape_decode(code[i:i+6])[0]]) i += 6 else: if current != 'unicode': tokens[len(tokens)-1][1] += code[i] else: current = 'raw' tokens.append([current, code[i:i+1]]) i += 1 return tokens
def _autoPreDecode(self, code=None): ''' 解码预处理,从原始code中识别出url编码子串、HEX编码子串、unicode编码、原始(ASCII)子串; 返回token数组,数组中每个元素是一个子串,格式为[type, substring],目前type支持"urlcode"、"hexcode"、"unicode"、"raw" ''' code = code if code else self.code current = code[0] == '%' and 'urlcode' or code[ 0:2] == '\\x' and 'hexcode' or code[ 0:2] == '\\u' and 'unicode' or 'raw' tokens = [] tokens.append([current, ""]) i = 0 while i < len(code): if code[i] == '%': if current == 'urlcode': tokens[len(tokens) - 1][1] += chr( int(code[i + 1:i + 3], 16)) else: current = 'urlcode' tokens.append([current, chr(int(code[i + 1:i + 3], 16))]) i += 3 elif code[i:i + 2].lower() == '\\x' or code[i:i + 2] == '0x': if current == "hexcode": tokens[len(tokens) - 1][1] += chr( int(code[i + 2:i + 4], 16)) else: current = 'hexcode' tokens.append([current, chr(int(code[i + 2:i + 4], 16))]) i += 4 elif code[i:i + 2].lower() == '\\u': if current == "unicode": tokens[len(tokens) - 1][1] += codecs.raw_unicode_escape_decode( code[i:i + 6])[0] else: current = 'unicode' tokens.append([ current, codecs.raw_unicode_escape_decode(code[i:i + 6])[0] ]) i += 6 else: if current not in ['urlcode', 'hexcode', 'unicode']: tokens[len(tokens) - 1][1] += code[i] else: current = 'raw' tokens.append([current, code[i:i + 1]]) i += 1 return tokens
def page_through_results(query, limit, **kwargs): offset = 0 while True: url = make_url(query, limit=limit, offset=offset, **kwargs) try: response = urllib.request.urlopen(url) except urllib.error.HTTPError as e: print(e.read()) raise #raw_json = response.read() #with open('debug.json', 'w') as f: # f.write(raw_json) #raw_json = raw_json.decode("unicode_escape") #raw_json = open('debug.json').read() #data = json.loads(raw_json) # This should be # data = json.load(response) # but virtuoso generates invalid json, so we have to work around it. # See https://github.com/dbpedia/extraction-framework/issues/318 from codecs import raw_unicode_escape_decode json_data = raw_unicode_escape_decode(response.read())[0] data = json.loads(json_data) global cols cols = data['head']['vars'] result = data['results']['bindings'] yield result if len(result) < limit: break else: offset += limit print('.')
def _autoPreDecode(self, code=None): ''' 解码预处理,从原始code中识别出url编码子串、HEX编码子串、unicode编码、原始(ASCII)子串;返回token数组,数组中每个元素是 一个子串,格式为[type, substring],目前type支持"urlcode"、"hexcode"、"unicode"、"raw" ''' code = code if code else self.code current = code[0]=='%' and 'urlcode' or code[0:2]=='\\x' and 'hexcode' or code[0:2]=='\\u' and 'unicode' or 'raw' tokens = [] tokens.append([current, ""]) i = 0 while i<len(code): if code[i] == '%': if current == 'urlcode': tokens[len(tokens)-1][1] += chr(int(code[i+1:i+3],16)) else: current = 'urlcode' tokens.append([current, chr(int(code[i+1:i+3],16))]) i += 3 elif code[i:i+2].lower() == '\\x' or code[i:i+2] == '0x': if current == "hexcode": tokens[len(tokens)-1][1] += chr(int(code[i+2:i+4],16)) else: current = 'hexcode' tokens.append([current, chr(int(code[i+2:i+4],16))]) i += 4 elif code[i:i+2].lower() == '\\u': if current == "unicode": tokens[len(tokens)-1][1] += codecs.raw_unicode_escape_decode(code[i:i+6])[0] else: current = 'unicode' tokens.append([current, codecs.raw_unicode_escape_decode(code[i:i+6])[0]]) i += 6 else: if current not in ['urlcode','hexcode','unicode']: tokens[len(tokens)-1][1] += code[i] else: current = 'raw' tokens.append([current, code[i:i+1]]) i += 1 return tokens
def unescape_unicode(raw_str: str): """ >>> s = r'тест\u0301' >>> codecs.raw_unicode_escape_encode(s) >>> sss = ('\\u0442\\u0435\\u0441\\u0442\\u0301', 10) >>> codecs.raw_unicode_escape_decode(sss[0]) >>> 'тест́' :param raw_str: a raw string with unicode escapes :return: an unescaped unicode string """ _bytes, c = codecs.raw_unicode_escape_encode(raw_str) _str, c = codecs.raw_unicode_escape_decode(_bytes) return _str
def setUnit(self, unit): self.unit = codecs.raw_unicode_escape_decode(unit)[0] if self.unit is not None: if self.prefix is not None: unit_str = "{0}{1}".format(self.prefix, self.unit) else: unit_str = self.unit logger.info("Setting unit {0}".format(unit_str)) self.unitLabel.setText(unit_str) try: self.valueSlider.setUnit(unit_str) except AttributeError: # Slider had no unit pass
def _unicodePreDecode(self, code=None): code = code if code else self.code current = code[0:2]=='\\u' and 'unicode' or 'raw' tokens = [] tokens.append([current, ""]) i = 0 while i<len(code): if code[i:i+2].lower() == '\\u': if current == "unicode": tokens[len(tokens)-1][1] += codecs.raw_unicode_escape_decode(code[i:i+6])[0] else: current = 'unicode' tokens.append([current, codecs.raw_unicode_escape_decode(code[i:i+6])[0]]) i += 6 else: if current != 'unicode': tokens[len(tokens)-1][1] += code[i] else: current = 'raw' tokens.append([current, code[i:i+1]]) i += 1 return tokens
def parse_data(data): try: __data_tmp = codecs.raw_unicode_escape_decode(data)[0] if "success" in __data_tmp: # decode()解码收到的字节 __parse_data_tmp = parse_task_finish_data(__data_tmp) print(__parse_data_tmp) # print(int(get_task_wing_num('taskid'))) if int(__parse_data_tmp['success']) == 1 and int( get_task_wing_num(__parse_data_tmp['taskid'])) > 0: account_id = get_player_account_id( int(__parse_data_tmp['roleid'])) pay(account_id, __parse_data_tmp['taskid']) except Exception as err: print("【ERROR】{}".format(err)) with open('errlog.log', 'a+', encoding='utf8') as errlog: errlog.write("【ERROR】{}".format(err))
def test_codecs_builtins(self): s = "abc" encoded = codecs.utf_8_encode(s) self.assertEqual(s, codecs.utf_8_decode(encoded[0])[0]) encoded = codecs.utf_7_encode(s) self.assertEqual(s, codecs.utf_7_decode(encoded[0])[0]) encoded = codecs.utf_16_encode(s) self.assertEqual(s, codecs.utf_16_decode(encoded[0])[0]) encoded = codecs.utf_16_le_encode(s) self.assertEqual(s, codecs.utf_16_le_decode(encoded[0])[0]) encoded = codecs.utf_16_be_encode(s) self.assertEqual(s, codecs.utf_16_be_decode(encoded[0])[0]) encoded = codecs.utf_32_encode(s) self.assertEqual(s, codecs.utf_32_decode(encoded[0])[0]) encoded = codecs.utf_32_le_encode(s) self.assertEqual(s, codecs.utf_32_le_decode(encoded[0])[0]) encoded = codecs.utf_32_be_encode(s) self.assertEqual(s, codecs.utf_32_be_decode(encoded[0])[0]) encoded = codecs.utf_32_be_encode(s) self.assertEqual(s, codecs.utf_32_be_decode(encoded[0])[0]) encoded = codecs.raw_unicode_escape_encode(s) self.assertEqual(s, codecs.raw_unicode_escape_decode(encoded[0])[0]) encoded = codecs.unicode_escape_encode(s) self.assertEqual(s, codecs.unicode_escape_decode(encoded[0])[0]) encoded = codecs.latin_1_encode(s) self.assertEqual(s, codecs.latin_1_decode(encoded[0])[0]) encoded = codecs.ascii_encode(s) self.assertEqual(s, codecs.ascii_decode(encoded[0])[0])
def test_raw_unicode_escape_decode(self): #sanity new_str, num_processed = codecs.raw_unicode_escape_decode("abc") self.assertEqual(new_str, 'abc') self.assertEqual(num_processed, 3)
def decode(self, input, errors='strict'): return codecs.raw_unicode_escape_decode(input, errors, False)
def test_raw_unicode_escape_decode(self): #sanity new_str, size = codecs.raw_unicode_escape_decode("abc") self.assertEqual(new_str, u'abc') self.assertEqual(size, 3)
def _buffer_decode(self, input, errors, final): return codecs.raw_unicode_escape_decode(input, errors, final)
def decode(self, input, final=False): return codecs.raw_unicode_escape_decode(input, self.errors)[0]
def update_event(self, inp=-1): self.set_output_val( 0, codecs.raw_unicode_escape_decode(self.input(0), self.input(1)))