Ejemplo n.º 1
0
    def __init__(self):
        from json.decoder import JSONArray, JSONObject, scanstring

        super().__init__()

        self.parse_object = _create_object_hook(JSONObject)
        self.parse_array = _create_object_hook(JSONArray)
        str_parser_wrapper = _create_object_hook(
            lambda s_with_end, strict: scanstring(*s_with_end, strict))
        self.parse_string = lambda s, end, strict: str_parser_wrapper(
            (s, end), strict)

        # Here i'am patching scanner closure, because it's internally refers to
        # itself and it is't configurable.
        # Schema is: 'py_make_scanner' defines '_scan_once', which is referred
        # by 'scan_once' which is result of 'py_make_scanner()' expression.
        orig_scanner = copy.deepcopy(json.scanner.py_make_scanner(self))
        try:
            cell = next(cell for cell in orig_scanner.__closure__
                        if callable(cell.cell_contents)
                        and cell.cell_contents.__name__ == '_scan_once')
        except StopIteration:
            raise ValueError(f'Failed to path {orig_scanner.__name__}, '
                             f'probably the internals has been changed')

        self.scan_once = _create_scanner_wrapper(cell.cell_contents)
        # Function closure cells read-only before python 3.7,
        # here using one approach found on internet ...
        _cell_set(cell, self.scan_once)
Ejemplo n.º 2
0
def custom_parse_string(string, idx, encoding, strict):
   obj = scanstring(string, idx, encoding, strict)
   if type(obj[0]) in [str, unicode]:
      try:
         dt = datetime.datetime.strptime(obj[0], ISO_TIMESTAMP_FORMAT)
         return [dt, obj[1]]
      except:
         pass
   return obj
Ejemplo n.º 3
0
 def _parse_string(*args, **kwargs):
     s, idx = scanstring(*args, **kwargs)
     if self._use_binary_hex_encoding:
         if s and s[0:2] == '0x':
             s = a2b_hex(s[2:])
     else:
         if s and s[0] == '\x00':
             s = base64.b64decode(s[1:])
     return s, idx
Ejemplo n.º 4
0
def custom_parse_string(string, idx, encoding, strict):
    obj = scanstring(string, idx, encoding, strict)
    if type(obj[0]) in [str, unicode]:
        try:
            dt = datetime.datetime.strptime(obj[0], ISO_TIMESTAMP_FORMAT)
            return [dt, obj[1]]
        except:
            pass
    return obj
Ejemplo n.º 5
0
def parse_object(data):
    yield ('start_map', None)
    pos, symbol = next(data)
    if symbol != '}':
        while True:
            yield ('map_key', scanstring(symbol, 1)[0])
            pos, symbol = next(data)
            for event in parse_value(data, None, pos):
                yield event
            pos, symbol = next(data)
            if symbol == '}':
                break
            pos, symbol = next(data)
    yield ('end_map', None)
Ejemplo n.º 6
0
 def _parse_string(*args, **kwargs):
     s, idx = scanstring(*args, **kwargs)
     if self._use_binary_hex_encoding:
         if s and s[0:2] == '0x':
             s = a2b_hex(s[2:])
             return s, idx
     else:
         if s and s[0] == '\x00':
             s = base64.b64decode(s[1:])
             return s, idx
     if self._use_decimal_from_str and _DEC_MATCH.match(s):
         try:
             s = decimal.Decimal(s)
             return s, idx
         except decimal.InvalidOperation:
             pass
     return s, idx
Ejemplo n.º 7
0
def parse_value(data, sym=None, pos=0):
    if sym is None:
        pos, sym = next(data)
    if sym == 'null':
        yield ('null', None)
    elif sym == 'true' or sym == 'false':
        yield ('boolean', sym == 'true')
    elif sym == '[':
        for event in parse_array(data):
            yield event
    elif sym == '{':
        for event in parse_object(data):
            yield event
    elif sym[0] == '"':
        yield ('string', scanstring(sym, 1)[0])
    else:
        yield ('number', decimal.Decimal(sym))
Ejemplo n.º 8
0
def _get_loads(strict=STRICT):
    if not strict:
        try:
            from simplejson import loads
        except ImportError:
            from json import loads
        return loads

    # If we don't have json, we can only fall back to simplejson, non-strict
    try:
        from json import decoder
    except ImportError:
        global STRICT
        STRICT = False
        from simplejson import loads
        return loads
    try:
        res = decoder.c_scanstring('"str"', 1)
    except TypeError:
        # github issue #33: pypy may not have c_scanstring
        res = decoder.scanstring('"str"', 1)

    if type(res[0]) is unicode:
        from json import loads
        return loads

    import json as _myjson
    from json import scanner

    class MyJSONDecoder(_myjson.JSONDecoder):

        def __init__(self, *args, **kwargs):
            _myjson.JSONDecoder.__init__(self, *args, **kwargs)

            # reset scanner to python-based one using python scanstring
            self.parse_string = decoder.py_scanstring
            self.scan_once = scanner.py_make_scanner(self)

    def loads(s, *args, **kwargs):
        if 'cls' not in kwargs:
            kwargs['cls'] = MyJSONDecoder
        return _myjson.loads(s, *args, **kwargs)

    return loads
Ejemplo n.º 9
0
def _get_loads(strict=STRICT):
    if not strict:
        try:
            from simplejson import loads
        except ImportError:
            from json import loads
        return loads

    # If we don't have json, we can only fall back to simplejson, non-strict
    try:
        from json import decoder
    except ImportError:
        global STRICT
        STRICT = False
        from simplejson import loads
        return loads
    try:
        res = decoder.c_scanstring('"str"', 1)
    except TypeError:
        # github issue #33: pypy may not have c_scanstring
        res = decoder.scanstring('"str"', 1)

    if type(res[0]) is unicode:
        from json import loads
        return loads

    import json as _myjson
    from json import scanner

    class MyJSONDecoder(_myjson.JSONDecoder):
        def __init__(self, *args, **kwargs):
            _myjson.JSONDecoder.__init__(self, *args, **kwargs)

            # reset scanner to python-based one using python scanstring
            self.parse_string = decoder.py_scanstring
            self.scan_once = scanner.py_make_scanner(self)

    def loads(s, *args, **kwargs):
        if 'cls' not in kwargs:
            kwargs['cls'] = MyJSONDecoder
        return _myjson.loads(s, *args, **kwargs)

    return loads
Ejemplo n.º 10
0
def JSONObject(match, context, _w=decoder.WHITESPACE.match):
    pairs = OrderedDict(); # Change to an ordered dict
    s = match.string
    end = _w(s, match.end()).end()
    nextchar = s[end:end + 1]
    # Trivial empty object
    if nextchar == '}':
        return pairs, end + 1
    if nextchar != '"':
        raise ValueError(decoder.errmsg("Expecting property name", s, end))
    end += 1
    encoding = getattr(context, 'encoding', None)
    strict = getattr(context, 'strict', True)
    iterscan = JSONScanner.iterscan
    while True:
        key, end = decoder.scanstring(s, end, encoding, strict)
        end = _w(s, end).end()
        if s[end:end + 1] != ':':
            raise ValueError(decoder.errmsg("Expecting : delimiter", s, end))
        end = _w(s, end + 1).end()
        try:
            value, end = iterscan(s, idx=end, context=context).next()
        except StopIteration:
            raise ValueError(decoder.errmsg("Expecting object", s, end))
        pairs[key] = value
        end = _w(s, end).end()
        nextchar = s[end:end + 1]
        end += 1
        if nextchar == '}':
            break
        if nextchar != ',':
            raise ValueError(decoder.errmsg("Expecting , delimiter", s, end - 1))
        end = _w(s, end).end()
        nextchar = s[end:end + 1]
        end += 1
        if nextchar != '"':
            raise ValueError(decoder.errmsg("Expecting property name", s, end - 1))
    object_hook = getattr(context, 'object_hook', None)
    if object_hook is not None:
        pairs = object_hook(pairs)
    return pairs, end
Ejemplo n.º 11
0
def _TokenizingJSONObject(s_and_end, strict, scan_once,
                          memo, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
    s, end = s_and_end
    pairs = []
    pairs_append = pairs.append
    memo_get = memo.setdefault
    # Use a slice to prevent IndexError from being raised, the following
    # check will raise a more specific ValueError if the string is empty
    nextchar = s[end:end + 1]
    # Normally we expect nextchar == '"'
    if nextchar != '"':
        if nextchar in _ws:
            end = _w(s, end).end()
            nextchar = s[end:end + 1]
        # Trivial empty object
        if nextchar == '}':
            return {}, end + 1
        elif nextchar != '"':
            raise JSONDecodeError(
                "Expecting property name enclosed in double quotes", s, end)
    end += 1
    while True:
        start = end - 1
        key, end = scanstring(s, end, strict)
        key = memo_get(key, key)
        key = ScalarToken(memo_get(key, key), start, end - 1)
        # To skip some function call overhead we optimize the fast paths where
        # the JSON key separator is ": " or just ":".
        if s[end:end + 1] != ':':
            end = _w(s, end).end()
            if s[end:end + 1] != ':':
                raise JSONDecodeError("Expecting ':' delimiter", s, end)
        end += 1

        try:
            if s[end] in _ws:
                end += 1
                if s[end] in _ws:
                    end = _w(s, end + 1).end()
        except IndexError:
            pass

        try:
            value, end = scan_once(s, end)
        except StopIteration as err:
            raise JSONDecodeError("Expecting value", s, err.value) from None
        pairs_append((key, value))
        try:
            nextchar = s[end]
            if nextchar in _ws:
                end = _w(s, end + 1).end()
                nextchar = s[end]
        except IndexError:
            nextchar = ''
        end += 1

        if nextchar == '}':
            break
        elif nextchar != ',':
            raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
        end = _w(s, end).end()
        nextchar = s[end:end + 1]
        end += 1
        if nextchar != '"':
            raise JSONDecodeError(
                "Expecting property name enclosed in double quotes", s, end - 1)
    return dict(pairs), end
Ejemplo n.º 12
0
 def _parse_string(*args, **kwargs):
     s, idx = scanstring(*args, **kwargs)
     if s and s[0] == u'\x00':
         s = base64.b64decode(s[1:])
     return s, idx
Ejemplo n.º 13
0
def JSONObject(s_and_end,
               strict,
               scan_once,
               object_hook,
               object_pairs_hook,
               memo=None,
               _w=WHITESPACE.match,
               _ws=WHITESPACE_STR):
    s, end = s_and_end
    pairs = []
    pairs_append = pairs.append
    # Backwards compatibility
    if memo is None:
        memo = {}
    memo_get = memo.setdefault
    # Use a slice to prevent IndexError from being raised, the following
    # check will raise a more specific ValueError if the string is empty
    nextchar = s[end:end + 1]
    # Normally we expect nextchar == '"'
    if nextchar != '"':
        if nextchar in _ws:
            end = _w(s, end).end()
            nextchar = s[end:end + 1]
        # Trivial empty object
        if nextchar == '}':
            if object_pairs_hook is not None:
                result = object_pairs_hook(pairs)
                return result, end + 1
            pairs = {}
            if object_hook is not None:
                pairs = object_hook(pairs)
            return pairs, end + 1
        elif nextchar != '"':
            raise JSONDecodeError(
                "Expecting property name enclosed in double quotes", s, end)
    end += 1
    while True:
        key_start = end - 1
        key, end = scanstring(s, end, strict)
        key_end = end
        key = memo_get(key, key)
        # To skip some function call overhead we optimize the fast paths where
        # the JSON key separator is ": " or just ":".
        if s[end:end + 1] != ':':
            end = _w(s, end).end()
            if s[end:end + 1] != ':':
                raise JSONDecodeError("Expecting ':' delimiter", s, end)
        end += 1

        try:
            if s[end] in _ws:
                end += 1
                if s[end] in _ws:
                    end = _w(s, end + 1).end()
        except IndexError:
            pass

        try:
            value, end = scan_once(s, end)
        except StopIteration as err:
            raise JSONDecodeError("Expecting value", s, err.value) from None
        value.key_start = key_start
        value.key_end = key_end
        pairs_append((key, value))
        try:
            nextchar = s[end]
            if nextchar in _ws:
                end = _w(s, end + 1).end()
                nextchar = s[end]
        except IndexError:
            nextchar = ''
        end += 1

        if nextchar == '}':
            break
        elif nextchar != ',':
            raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
        end = _w(s, end).end()
        nextchar = s[end:end + 1]
        end += 1
        if nextchar != '"':
            raise JSONDecodeError(
                "Expecting property name enclosed in double quotes", s,
                end - 1)
    if object_pairs_hook is not None:
        result = object_pairs_hook(pairs)
        return result, end
    pairs = dict(pairs)
    if object_hook is not None:
        pairs = object_hook(pairs)
    return pairs, end
Ejemplo n.º 14
0
 def unquote_parse_string(*args, **kwargs):
     result = scanstring(*args, **kwargs)  # => (str, end_index)
     unquotedResult = (unquote(result[0]), result[1])
     return unquotedResult
Ejemplo n.º 15
0
def parse_string(pinfo, string, index):
    if index < len(string) and string[index] == '"':
        return scanstring(string, index + 1)
    return NotImplemented, index
Ejemplo n.º 16
0
 def unquote_parse_string(*args, **kwargs):
     result = scanstring(*args, **kwargs)  # => (str, end_index)
     unquotedResult = (unquote(result[0]), result[1])
     return unquotedResult
Ejemplo n.º 17
0
def parse_string(symbol):
    return scanstring(symbol[1:], 0)[0]
Ejemplo n.º 18
0
def parse_object(s_and_end,
                 strict,
                 scan_once,
                 object_hook,
                 object_pairs_hook,
                 memo=None,
                 _w=WHITESPACE.match,
                 _ws=WHITESPACE_STR,
                 _ilcs=INLINE_COMMENT_STRING_START,
                 _ilc=INLINE_COMMENT.match,
                 _mlcs=MULTILINE_COMMENT_STRING_START,
                 _mlc=MULTILINE_COMMENT.match):
    '''
    Modified json.decoder.JSONObject function from standard json module
    (python 3.7.7).
    '''
    # pylint: disable=invalid-name
    s, end = s_and_end
    pairs = []
    pairs_append = pairs.append
    # Backwards compatibility
    if memo is None:
        memo = {}
    memo_get = memo.setdefault
    # Use a slice to prevent IndexError from being raised, the following
    # check will raise a more specific ValueError if the string is empty
    nextchar = s[end:end + 1]
    # Normally we expect nextchar == '"'
    if nextchar != '"':
        while True:  # Handle comments and whitespaces
            if nextchar in _ws:
                end = _w(s, end).end()
            elif s[end:].startswith(_ilcs):
                end = _ilc(s, end).end()
            elif s[end:].startswith(_mlcs):
                end = _mlc(s, end).end()
            else:
                break
            nextchar = s[end:end + 1]

        # Trivial empty object
        if nextchar == '}':
            if object_pairs_hook is not None:
                result = object_pairs_hook(pairs)
                return result, end + 1
            pairs = {}
            if object_hook is not None:
                pairs = object_hook(pairs)
            return pairs, end + 1
        elif nextchar != '"':
            raise JSONDecodeError(
                "Expecting property name enclosed in double quotes", s, end)
    end += 1
    while True:
        key, end = scanstring(s, end, strict)
        key = memo_get(key, key)
        # To skip some function call overhead we optimize the fast paths where
        # the JSON key separator is ": " or just ":".
        if s[end:end + 1] != ':':
            while True:  # Handle comments and whitespaces
                if s[end:end + 1] in _ws:
                    end = _w(s, end).end()
                elif s[end:].startswith(_ilcs):
                    end = _ilc(s, end).end()
                elif s[end:].startswith(_mlcs):
                    end = _mlc(s, end).end()
                else:
                    break
            if s[end:end + 1] != ':':
                raise JSONDecodeError("Expecting ':' delimiter", s, end)
        end += 1

        try:
            while True:  # Handle comments and whitespaces
                if s[end] in _ws:
                    end = _w(s, end).end()
                elif s[end:].startswith(_ilcs):
                    end = _ilc(s, end).end()
                elif s[end:].startswith(_mlcs):
                    end = _mlc(s, end).end()
                else:
                    break
        except IndexError:
            pass

        try:
            value, end = scan_once(s, end)
        except StopIteration as err:
            raise JSONDecodeError("Expecting value", s, err.value) from None
        pairs_append((key, value))

        try:
            nextchar = s[end]
            while True:  # Handle comments and whitespaces
                if nextchar in _ws:
                    end = _w(s, end).end()
                elif s[end:].startswith(_ilcs):
                    end = _ilc(s, end).end()
                elif s[end:].startswith(_mlcs):
                    end = _mlc(s, end).end()
                else:
                    break
                nextchar = s[end]
        except IndexError:
            nextchar = ''
        end += 1

        if nextchar == '}':
            break
        elif nextchar != ',':
            raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)

        while True:  # Handle comments and whitespaces
            if s[end] in _ws:
                end = _w(s, end).end()
            elif s[end:].startswith(_ilcs):
                end = _ilc(s, end).end()
            elif s[end:].startswith(_mlcs):
                end = _mlc(s, end).end()
            else:
                break
        nextchar = s[end:end + 1]
        end += 1
        if nextchar != '"':
            raise JSONDecodeError(
                "Expecting property name enclosed in double quotes", s,
                end - 1)
    if object_pairs_hook is not None:
        result = object_pairs_hook(pairs)
        return result, end
    pairs = dict(pairs)
    if object_hook is not None:
        pairs = object_hook(pairs)
    return pairs, end
Ejemplo n.º 19
0
def parse_string(symbol):
    return scanstring(symbol, 1)[0]
Ejemplo n.º 20
0
def _parse_string(*args, **kwargs):
    s, idx = scanstring(*args, **kwargs)
    if s and s[0] == u'\x00':
        s = base64.b64decode(s[1:])
    return s, idx
Ejemplo n.º 21
0
def parse_string(symbol):
    return scanstring(symbol[1:], 0)[0]
Ejemplo n.º 22
0
def parse_string(symbol):
    return scanstring(symbol, 1)[0]
Ejemplo n.º 23
0
def parse_escape_sequences(string):
    """Parse a string for possible escape sequences.

    Sample usage:
    >>> parse_escape_sequences('foo\\nbar')
    'foo\nbar'
    >>> parse_escape_sequences('foo\\\\u0256')
    'foo\\u0256'

    :param string:
        Any string.
    :type string:
        `basestring`
    :raises:
        :class:`ValueError` if a backslash character is found, but it doesn't
        form a proper escape sequence with the character(s) that follow.
    :return:
        The parsed string. Will parse the standard escape sequences, and also
        basic \\uxxxx escape sequences.
        \\uxxxxxxxxxx escape sequences are not currently supported.
    :rtype:
        `unicode`
    """
    string = safe_unicode(string)
    characters = []
    i = 0
    string_len = len(string)
    while i < string_len:
        character = string[i]
        if character == '\\':
            # Figure out the size of the escape sequence. Most escape sequences
            # are two characters (e.g. '\\' and 'n'), with the sole exception
            # being \uxxxx escape sequences, which are six characters.
            if string[(i + 1):(i + 2)] == 'u':
                offset = 6
            else:
                offset = 2

            try:
                # `json.decoder.scanstring()` mostly does what we want, but it
                # also does some stuff that we don't want, like parsing quote
                # characters. This will mess us up. The iteration and scanning
                # within this loop is meant to isolate the escape sequences, so
                # that we'll always be calling it with something like
                # >>> scanstring('"\n"', 1)
                # or
                # >>> scanstring('"\u0256"', 1)
                # The 1 refers to the location of the first character after the
                # open quote character.
                json_string = '"' + string[i:(i + offset)] + '"'
                character = scanstring(json_string, 1)[0]
                characters.append(character)
                i += offset
            except ValueError:
                # If an exception was raised, raise a new `ValueError`. The
                # reason we don't re-raise the original exception is because,
                # in Python 3, it is a custom JSON `ValueError` subclass. We
                # don't want to raise a JSON error from a function that has
                # nothing to do with JSON, so we create a new `ValueError`. The
                # error message is also nonsensical to the caller, in all
                # cases.
                raise_from(ValueError(string), None)
        else:
            characters.append(character)
            i += 1
    return ''.join(characters)