예제 #1
0
    def _unicodePreDecode(self, code=None):
        code = code if code else self.code

        current = code[0:2] == '\\u' and 'unicode' or 'raw'
        tokens = []
        tokens.append([current, ""])
        i = 0
        while i < len(code):
            if code[i:i + 2].lower() == '\\u':
                if current == "unicode":
                    tokens[len(tokens) -
                           1][1] += codecs.raw_unicode_escape_decode(
                               code[i:i + 6])[0]
                else:
                    current = 'unicode'
                    tokens.append([
                        current,
                        codecs.raw_unicode_escape_decode(code[i:i + 6])[0]
                    ])
                i += 6
            else:
                if current != 'unicode':
                    tokens[len(tokens) - 1][1] += code[i]
                else:
                    current = 'raw'
                    tokens.append([current, code[i:i + 1]])
                i += 1

        return tokens
def retrieve_json(url, method='GET', data=None):
    """
    Utility: URL's body fetching
    :rtype : dict()
    :param url: URL to fetch
    :param method: the method to use for the request
    :param data: if method is POST, pass also some data for request's body
    :return: dictionary from the response's body
    """
    print(url)
    if method == 'GET':
        try:   # avoid unicode escaping problems (double backslash encoding)
            h = html.parser.HTMLParser()
            text = h.unescape(requests.get(url).text)
            return json.loads(raw_unicode_escape_decode(text)[0])
        except json.JSONDecodeError as e:
            raise e
    elif method == 'POST':
        if data is not None:
            try:
                h = html.parser.HTMLParser()
                text = h.unescape(requests.post(url, data=data).text)
                return json.loads(raw_unicode_escape_decode(text)[0])  # using bs4 to treat html entities
            except json.JSONDecodeError as e:
                raise e
        else:
            raise Exception('retrieve_json(): data for POST cannot be None')
    else:
        raise Exception('retrieve_json(): Wrong Method')
예제 #3
0
    def _unicodePreDecode(self, code=None):
        '''
        unicode类型解码预处理
            从原始code中识别出unicode编码、原始(ASCII)子串;
        @returns
            [[type, token]]: token数组,数组中每个元素包含类型和token字符串,token类型为"unicode"、"raw"
        '''
        code = code if code else self.code

        current = code[0:2]=='\\u' and 'unicode' or 'raw'
        tokens = []
        tokens.append([current, ""])
        i = 0
        while i<len(code):
            if code[i:i+2].lower() == '\\u' and self._isHex(code[i+2:i+4]) and self._isHex(code[i+4:i+6]):
                if current == "unicode":
                    tokens[len(tokens)-1][1] += codecs.raw_unicode_escape_decode(code[i:i+6])[0]
                else:
                    current = 'unicode'
                    tokens.append([current, codecs.raw_unicode_escape_decode(code[i:i+6])[0]])
                i += 6
            else:
                if current != 'unicode':
                    tokens[len(tokens)-1][1] += code[i]
                else:
                    current = 'raw'
                    tokens.append([current, code[i:i+1]])
                i += 1

        return tokens
예제 #4
0
    def _autoPreDecode(self, code=None):
        '''
        解码预处理,从原始code中识别出url编码子串、HEX编码子串、unicode编码、原始(ASCII)子串;
        返回token数组,数组中每个元素是一个子串,格式为[type, substring],目前type支持"urlcode"、"hexcode"、"unicode"、"raw"
        '''
        code = code if code else self.code

        current = code[0] == '%' and 'urlcode' or code[
            0:2] == '\\x' and 'hexcode' or code[
                0:2] == '\\u' and 'unicode' or 'raw'
        tokens = []
        tokens.append([current, ""])
        i = 0
        while i < len(code):
            if code[i] == '%':
                if current == 'urlcode':
                    tokens[len(tokens) - 1][1] += chr(
                        int(code[i + 1:i + 3], 16))
                else:
                    current = 'urlcode'
                    tokens.append([current, chr(int(code[i + 1:i + 3], 16))])
                i += 3
            elif code[i:i + 2].lower() == '\\x' or code[i:i + 2] == '0x':
                if current == "hexcode":
                    tokens[len(tokens) - 1][1] += chr(
                        int(code[i + 2:i + 4], 16))
                else:
                    current = 'hexcode'
                    tokens.append([current, chr(int(code[i + 2:i + 4], 16))])
                i += 4
            elif code[i:i + 2].lower() == '\\u':
                if current == "unicode":
                    tokens[len(tokens) -
                           1][1] += codecs.raw_unicode_escape_decode(
                               code[i:i + 6])[0]
                else:
                    current = 'unicode'
                    tokens.append([
                        current,
                        codecs.raw_unicode_escape_decode(code[i:i + 6])[0]
                    ])
                i += 6
            else:
                if current not in ['urlcode', 'hexcode', 'unicode']:
                    tokens[len(tokens) - 1][1] += code[i]
                else:
                    current = 'raw'
                    tokens.append([current, code[i:i + 1]])
                i += 1

        return tokens
예제 #5
0
def page_through_results(query, limit, **kwargs):
    offset = 0
    while True:
        url = make_url(query, limit=limit, offset=offset, **kwargs)
        try:
            response = urllib.request.urlopen(url)
        except urllib.error.HTTPError as e:
            print(e.read())
            raise
        #raw_json = response.read()
        #with open('debug.json', 'w') as f:
        #    f.write(raw_json)
        #raw_json = raw_json.decode("unicode_escape")
        #raw_json = open('debug.json').read()
        #data = json.loads(raw_json)

        # This should be
        #    data = json.load(response)
        # but virtuoso generates invalid json, so we have to work around it.
        # See https://github.com/dbpedia/extraction-framework/issues/318
        from codecs import raw_unicode_escape_decode
        json_data = raw_unicode_escape_decode(response.read())[0]
        data = json.loads(json_data)

        global cols
        cols = data['head']['vars']
        result = data['results']['bindings']
        yield result
        if len(result) < limit:
            break
        else:
            offset += limit
            print('.')
예제 #6
0
파일: coder.py 프로젝트: wooluo/pentestdb
    def _autoPreDecode(self, code=None):
        '''
        解码预处理,从原始code中识别出url编码子串、HEX编码子串、unicode编码、原始(ASCII)子串;返回token数组,数组中每个元素是
        一个子串,格式为[type, substring],目前type支持"urlcode"、"hexcode"、"unicode"、"raw"
        '''
        code = code if code else self.code

        current = code[0]=='%' and 'urlcode' or code[0:2]=='\\x' and 'hexcode' or code[0:2]=='\\u' and 'unicode' or 'raw'
        tokens = []
        tokens.append([current, ""])
        i = 0
        while i<len(code):
            if code[i] == '%':
                if current == 'urlcode':
                    tokens[len(tokens)-1][1] += chr(int(code[i+1:i+3],16))
                else:
                    current = 'urlcode'
                    tokens.append([current, chr(int(code[i+1:i+3],16))])
                i += 3
            elif code[i:i+2].lower() == '\\x' or code[i:i+2] == '0x':
                if current == "hexcode":
                    tokens[len(tokens)-1][1] += chr(int(code[i+2:i+4],16))
                else:
                    current = 'hexcode'
                    tokens.append([current, chr(int(code[i+2:i+4],16))])
                i += 4
            elif code[i:i+2].lower() == '\\u':
                if current == "unicode":
                    tokens[len(tokens)-1][1] += codecs.raw_unicode_escape_decode(code[i:i+6])[0]
                else:
                    current = 'unicode'
                    tokens.append([current, codecs.raw_unicode_escape_decode(code[i:i+6])[0]])
                i += 6
            else:
                if current not in ['urlcode','hexcode','unicode']:
                    tokens[len(tokens)-1][1] += code[i]
                else:
                    current = 'raw'
                    tokens.append([current, code[i:i+1]])
                i += 1

        return tokens
예제 #7
0
def unescape_unicode(raw_str: str):
    """
    >>> s = r'тест\u0301'
    >>> codecs.raw_unicode_escape_encode(s)
    >>> sss = ('\\u0442\\u0435\\u0441\\u0442\\u0301', 10)
    >>> codecs.raw_unicode_escape_decode(sss[0])
    >>> 'тест́'

    :param raw_str: a raw string with unicode escapes
    :return: an unescaped unicode string
    """
    _bytes, c = codecs.raw_unicode_escape_encode(raw_str)
    _str, c = codecs.raw_unicode_escape_decode(_bytes)
    return _str
    def setUnit(self, unit):
        self.unit = codecs.raw_unicode_escape_decode(unit)[0]
        if self.unit is not None:
            if self.prefix is not None:
                unit_str = "{0}{1}".format(self.prefix, self.unit)
            else:
                unit_str = self.unit
            logger.info("Setting unit {0}".format(unit_str))
            self.unitLabel.setText(unit_str)

            try:
                self.valueSlider.setUnit(unit_str)
            except AttributeError:
                # Slider had no unit
                pass
예제 #9
0
파일: coder.py 프로젝트: wooluo/pentestdb
    def _unicodePreDecode(self, code=None):
        code = code if code else self.code

        current = code[0:2]=='\\u' and 'unicode' or 'raw'
        tokens = []
        tokens.append([current, ""])
        i = 0
        while i<len(code):
            if code[i:i+2].lower() == '\\u':
                if current == "unicode":
                    tokens[len(tokens)-1][1] += codecs.raw_unicode_escape_decode(code[i:i+6])[0]
                else:
                    current = 'unicode'
                    tokens.append([current, codecs.raw_unicode_escape_decode(code[i:i+6])[0]])
                i += 6
            else:
                if current != 'unicode':
                    tokens[len(tokens)-1][1] += code[i]
                else:
                    current = 'raw'
                    tokens.append([current, code[i:i+1]])
                i += 1

        return tokens
예제 #10
0
def parse_data(data):
    try:
        __data_tmp = codecs.raw_unicode_escape_decode(data)[0]
        if "success" in __data_tmp:  # decode()解码收到的字节
            __parse_data_tmp = parse_task_finish_data(__data_tmp)
            print(__parse_data_tmp)
            # print(int(get_task_wing_num('taskid')))
            if int(__parse_data_tmp['success']) == 1 and int(
                    get_task_wing_num(__parse_data_tmp['taskid'])) > 0:
                account_id = get_player_account_id(
                    int(__parse_data_tmp['roleid']))
                pay(account_id, __parse_data_tmp['taskid'])
    except Exception as err:
        print("【ERROR】{}".format(err))
        with open('errlog.log', 'a+', encoding='utf8') as errlog:
            errlog.write("【ERROR】{}".format(err))
예제 #11
0
    def test_codecs_builtins(self):
        s = "abc"

        encoded = codecs.utf_8_encode(s)
        self.assertEqual(s, codecs.utf_8_decode(encoded[0])[0])

        encoded = codecs.utf_7_encode(s)
        self.assertEqual(s, codecs.utf_7_decode(encoded[0])[0])

        encoded = codecs.utf_16_encode(s)
        self.assertEqual(s, codecs.utf_16_decode(encoded[0])[0])

        encoded = codecs.utf_16_le_encode(s)
        self.assertEqual(s, codecs.utf_16_le_decode(encoded[0])[0])

        encoded = codecs.utf_16_be_encode(s)
        self.assertEqual(s, codecs.utf_16_be_decode(encoded[0])[0])

        encoded = codecs.utf_32_encode(s)
        self.assertEqual(s, codecs.utf_32_decode(encoded[0])[0])

        encoded = codecs.utf_32_le_encode(s)
        self.assertEqual(s, codecs.utf_32_le_decode(encoded[0])[0])

        encoded = codecs.utf_32_be_encode(s)
        self.assertEqual(s, codecs.utf_32_be_decode(encoded[0])[0])

        encoded = codecs.utf_32_be_encode(s)
        self.assertEqual(s, codecs.utf_32_be_decode(encoded[0])[0])

        encoded = codecs.raw_unicode_escape_encode(s)
        self.assertEqual(s, codecs.raw_unicode_escape_decode(encoded[0])[0])

        encoded = codecs.unicode_escape_encode(s)
        self.assertEqual(s, codecs.unicode_escape_decode(encoded[0])[0])

        encoded = codecs.latin_1_encode(s)
        self.assertEqual(s, codecs.latin_1_decode(encoded[0])[0])

        encoded = codecs.ascii_encode(s)
        self.assertEqual(s, codecs.ascii_decode(encoded[0])[0])
예제 #12
0
 def test_raw_unicode_escape_decode(self):
     #sanity
     new_str, num_processed = codecs.raw_unicode_escape_decode("abc")
     self.assertEqual(new_str, 'abc')
     self.assertEqual(num_processed, 3)
예제 #13
0
 def decode(self, input, errors='strict'):
     return codecs.raw_unicode_escape_decode(input, errors, False)
예제 #14
0
 def test_raw_unicode_escape_decode(self):
     #sanity
     new_str, size = codecs.raw_unicode_escape_decode("abc")
     self.assertEqual(new_str, u'abc')
     self.assertEqual(size, 3)
예제 #15
0
 def test_raw_unicode_escape_decode(self):
     #sanity
     new_str, size = codecs.raw_unicode_escape_decode("abc")
     self.assertEqual(new_str, u'abc')
     self.assertEqual(size, 3)
예제 #16
0
 def _buffer_decode(self, input, errors, final):
     return codecs.raw_unicode_escape_decode(input, errors, final)
예제 #17
0
 def decode(self, input, final=False):
     return codecs.raw_unicode_escape_decode(input, self.errors)[0]
예제 #18
0
파일: nodes.py 프로젝트: xxoolm/Ryven
 def update_event(self, inp=-1):
     self.set_output_val(
         0, codecs.raw_unicode_escape_decode(self.input(0), self.input(1)))