Esempio n. 1
0
def entity_decode_hex(input, errors='strict'):
    """
    Decode hex HTML entity data in a string.
    """
    if _is_unicode(input):
        if '&' not in input:
            return input, len(input)
        bits = _asciire.split(input)
        res = [bits[0]]
        append = res.append
        for i in range(1, len(bits), 2):
            append(entityunquote(str(bits[i]))
                   .encode('bin').decode('bin'))
            append(bits[i + 1])
    preamble_regex = re.compile(r"&#x", flags=re.I)
    bits = preamble_regex.split(input)
    # fastpath
    if len(bits) == 1:
        return input, len(input)
    res = [bits[0]]
    append = res.append
    for item in bits[1:]:
        try:
            append(_hextochr[item[:2]])
            append(item[3:])
        except KeyError:
            append('&#x')
            append(item)
            append(';')

    return (''.join(res), len(input))
Esempio n. 2
0
def ascii85_encode(input, errors='strict'):
    assert not input.endswith('\0'), "Trailing nulls unsupported"
    if _is_unicode(input):
        # convert from multibyte to codepoint in a horrible way. Good
        # luck debugging the stupid bugs here fuckers.
        o, l = bin_encode(input)
        input, l = bin_decode(o)
    #encoding is adobe not btoa
    bs = 4
    padding = bs - ((len(input) % bs) or bs)
    input += '\0' * padding
    output = ""
    for block in blocks(input, bs):
        start = unpack(">I", block)[0]
        if not start:
            output += "z"
            continue
        quot, rem = divmod(start, 85)
        chr_block = chr(rem + 33)
        for i in xrange(bs):
            quot, rem = divmod(quot, 85)
            chr_block += chr(rem + 33)
        output += ''.join(reversed(chr_block))
    if padding:
        output = output[:-padding]
    return output, len(input)
Esempio n. 3
0
def entity_decode_hex(input, errors='strict'):
    """
    Decode hex HTML entity data in a string.
    """
    if _is_unicode(input):
        if '%' not in input:
            return s
        bits = _asciire.split(input)
        res = [bits[0]]
        append = res.append
        for i in range(1, len(bits), 2):
            append(unquote(str(bits[i])).decode('latin1'))
            append(bits[i + 1])
        return (''.join(res), len(input))

    preamble_regex = re.compile(r"&#x", flags=re.I)
    bits = preamble_regex.split(input)
    # fastpath
    if len(bits) == 1:
        return input
    res = [bits[0]]
    append = res.append
    for item in bits[1:]:
        try:
            append(_hextochr[item[:2]])
            append(item[3:])
        except KeyError:
            append('&#x')
            append(item)
            append(';')

    return (''.join(res), len(input))
Esempio n. 4
0
def _foursquare_urlencode(query, doseq=0, safe_chars="&/,+"):
    """Gnarly hack because Foursquare doesn't properly handle standard url encoding"""
    # Original doc: http://docs.python.org/2/library/urllib.html#urllib.urlencode
    # Works the same way as urllib.urlencode except two differences -
    # 1. it uses `quote()` instead of `quote_plus()`
    # 2. it takes an extra parameter called `safe_chars` which is a string
    #    having the characters which should not be encoded.
    #
    # Courtesy of github.com/iambibhas
    if hasattr(query,"items"):
        # mapping objects
        query = query.items()
    else:
        # it's a bother at times that strings and string-like objects are
        # sequences...
        try:
            # non-sequence items should not work with len()
            # non-empty strings will fail this
            if len(query) and not isinstance(query[0], tuple):
                raise TypeError
            # zero-length sequences of all types will get here and succeed,
            # but that's a minor nit - since the original implementation
            # allowed empty dicts that type of behavior probably should be
            # preserved for consistency
        except TypeError:
            ty,va,tb = sys.exc_info()
            raise TypeError, "not a valid non-string sequence or mapping object", tb

    l = []
    if not doseq:
        # preserve old behavior
        for k, v in query:
            k = urllib.quote(str(k), safe=safe_chars)
            v = urllib.quote(str(v), safe=safe_chars)
            l.append(k + '=' + v)
    else:
        for k, v in query:
            k = urllib.quote(str(k), safe=safe_chars)
            if isinstance(v, str):
                v = urllib.quote(v, safe=safe_chars)
                l.append(k + '=' + v)
            elif urllib._is_unicode(v):
                # is there a reasonable way to convert to ASCII?
                # encode generates a string, but "replace" or "ignore"
                # lose information and "strict" can raise UnicodeError
                v = urllib.quote(v.encode("ASCII","replace"), safe=safe_chars)
                l.append(k + '=' + v)
            else:
                try:
                    # is this a sufficient test for sequence-ness?
                    len(v)
                except TypeError:
                    # not a sequence
                    v = urllib.quote(str(v), safe=safe_chars)
                    l.append(k + '=' + v)
                else:
                    # loop over the sequence
                    for elt in v:
                        l.append(k + '=' + urllib.quote(str(elt)))
    return '&'.join(l)
Esempio n. 5
0
def ascii85_encode(input, errors='strict'):
    assert not input.endswith('\0'), "Trailing nulls unsupported"
    if _is_unicode(input):
        # convert from multibyte to codepoint in a horrible way. Good
        # luck debugging the stupid bugs here fuckers.
        o, l = bin_encode(input)
        input, l = bin_decode(o)
    #encoding is adobe not btoa
    bs = 4
    padding = bs - ((len(input) % bs) or bs)
    input += '\0' * padding
    output = ""
    for block in blocks(input, bs):
        start = unpack(">I", block)[0]
        if not start:
            output += "z"
            continue
        quot, rem = divmod(start, 85)
        chr_block = chr(rem + 33)
        for i in xrange(bs):
            quot, rem = divmod(quot, 85)
            chr_block += chr(rem + 33)
        output += ''.join(reversed(chr_block))
    if padding:
        output = output[:-padding]
    return output, len(input)
Esempio n. 6
0
def urlencode(query,doseq=0):
    """
        Hack of urllib's urlencode function, which can handle
        utf-8, but for unknown reasons, chooses not to by 
        trying to encode everything as ascii
    """
    if hasattr(query,"items"):
        # mapping objects
        query = query.items()
    else:
        # it's a bother at times that strings and string-like objects are
        # sequences...
        try:
            # non-sequence items should not work with len()
            # non-empty strings will fail this
            if len(query) and not isinstance(query[0], tuple):
                raise TypeError
            # zero-length sequences of all types will get here and succeed,
            # but that's a minor nit - since the original implementation
            # allowed empty dicts that type of behavior probably should be
            # preserved for consistency
        except TypeError:
            ty,va,tb = sys.exc_info()
            raise TypeError, "not a valid non-string sequence or mapping object", tb


    l = []
    if not doseq:
        # preserve old behavior
        for k, v in query:
            k = quote_plus(str(k))
            v = quote_plus(str(v))
            l.append(k + '=' + v)
    else:
        for k, v in query:
            k = quote_plus(str(k))
            if isinstance(v, str):
                v = quote_plus(v)
                l.append(k + '=' + v)
            elif _is_unicode(v):
                # is there a reasonable way to convert to ASCII?
                # encode generates a string, but "replace" or "ignore"
                # lose information and "strict" can raise UnicodeError
                v = quote_plus(v.encode("utf8","replace"))
                l.append(k + '=' + v)
            else:
                try:
                    # is this a sufficient test for sequence-ness?
                    x = len(v)
                except TypeError:
                    # not a sequence
                    v = quote_plus(str(v))
                    l.append(k + '=' + v)
                else:
                    # loop over the sequence
                    for elt in v:
                        l.append(k + '=' + quote_plus(str(elt)))
    return '&'.join(l)
Esempio n. 7
0
def urlencode(query, doseq=0):
    """
	Hack of urllib's urlencode function, which can handle
	utf-8, but for unknown reasons, chooses not to by 
	trying to encode everything as ascii
    """
    if hasattr(query, "items"):
        # mapping objects
        query = query.items()
    else:
        # it's a bother at times that strings and string-like objects are
        # sequences...
        try:
            # non-sequence items should not work with len()
            # non-empty strings will fail this
            if len(query) and not isinstance(query[0], tuple):
                raise TypeError
            # zero-length sequences of all types will get here and succeed,
            # but that's a minor nit - since the original implementation
            # allowed empty dicts that type of behavior probably should be
            # preserved for consistency
        except TypeError:
            ty, va, tb = sys.exc_info()
            raise TypeError, "not a valid non-string sequence or mapping object", tb

    l = []
    if not doseq:
        # preserve old behavior
        for k, v in query:
            k = quote_plus(str(k))
            v = quote_plus(str(v))
            l.append(k + '=' + v)
    else:
        for k, v in query:
            k = quote_plus(str(k))
            if isinstance(v, str):
                v = quote_plus(v)
                l.append(k + '=' + v)
            elif _is_unicode(v):
                # is there a reasonable way to convert to ASCII?
                # encode generates a string, but "replace" or "ignore"
                # lose information and "strict" can raise UnicodeError
                v = quote_plus(v.encode("utf8", "replace"))
                l.append(k + '=' + v)
            else:
                try:
                    # is this a sufficient test for sequence-ness?
                    x = len(v)
                except TypeError:
                    # not a sequence
                    v = quote_plus(str(v))
                    l.append(k + '=' + v)
                else:
                    # loop over the sequence
                    for elt in v:
                        l.append(k + '=' + quote_plus(str(elt)))
    return '&'.join(l)
Esempio n. 8
0
def urlencode(query, doseq=0, safe='/<>"\'=:()'):
    '''
    This is my version of urllib.urlencode , that adds "/" as a safe character and also adds support
    for "repeated parameter names".
    
    Note:
        This function is EXPERIMENTAL and should be used with care ;)
    
    Maybe this is the place to fix this bug:
        http://sourceforge.net/tracker2/?func=detail&aid=2675634&group_id=170274&atid=853652
        
    Original documentation:
        Encode a sequence of two-element tuples or dictionary into a URL query string.

        If any values in the query arg are sequences and doseq is true, each
        sequence element is converted to a separate parameter.

        If the query arg is a sequence of two-element tuples, the order of the
        parameters in the output will match the order of parameters in the
        input.
    '''

    if hasattr(query,"items"):
        # mapping objects
        query = query.items()
    else:
        # it's a bother at times that strings and string-like objects are
        # sequences...
        try:
            # non-sequence items should not work with len()
            # non-empty strings will fail this
            if len(query) and not isinstance(query[0], tuple):
                raise TypeError
            # zero-length sequences of all types will get here and succeed,
            # but that's a minor nit - since the original implementation
            # allowed empty dicts that type of behavior probably should be
            # preserved for consistency
        except TypeError:
            ty,va,tb = sys.exc_info()
            raise TypeError, "not a valid non-string sequence or mapping object", tb

    l = []
    if not doseq:
        # preserve old behavior
        for k, v in query:
            
            # keys are easy
            k = urllib.quote_plus(str(k), safe)
            
            # Check for [] in the value
            if isinstance(v, list):
                for v_item in v:
                    v_item = urllib.quote_plus(str(v_item), safe)
                    l.append(k + '=' + v_item)
            else:
                v = urllib.quote_plus(str(v), safe)
                l.append(k + '=' + v)
    else:
        for k, v in query:
            # keys are easy...
            k = urllib.quote_plus(str(k), safe)
            
            # now the value...
            # is string
            if isinstance(v, str):
                v = urllib.quote_plus(v, safe)
                l.append(k + '=' + v)
    
            # is unicode...
            elif urllib._is_unicode(v):
                # is there a reasonable way to convert to ASCII?
                # encode generates a string, but "replace" or "ignore"
                # lose information and "strict" can raise UnicodeError
                v = urllib.quote_plus(v.encode("ASCII","replace"), safe)
                l.append(k + '=' + v)
                
            else:
                try:
                    # is this a sufficient test for sequence-ness?
                    x = len(v)
                except TypeError:
                    # not a sequence
                    v = urllib.quote_plus(str(v), safe)
                    l.append(k + '=' + v)
                else:
                    # loop over the sequence
                    for elt in v:
                        l.append(k + '=' + urllib.quote_plus(str(elt), safe))
                        
    return '&'.join(l)
def urlencode(query, doseq=0, safe='/<>"\'=:()'):
    '''
    This is my version of urllib.urlencode , that adds "/" as a safe character and also adds support
    for "repeated parameter names".
    
    Note:
        This function is EXPERIMENTAL and should be used with care ;)
    
    Maybe this is the place to fix this bug:
        http://sourceforge.net/tracker2/?func=detail&aid=2675634&group_id=170274&atid=853652
        
    Original documentation:
        Encode a sequence of two-element tuples or dictionary into a URL query string.

        If any values in the query arg are sequences and doseq is true, each
        sequence element is converted to a separate parameter.

        If the query arg is a sequence of two-element tuples, the order of the
        parameters in the output will match the order of parameters in the
        input.


    >>> import cgi
    >>> urlencode( cgi.parse_qs('a=1&a=c') )
    'a=1&a=c'
    >>> urlencode( cgi.parse_qs('a=1&b=c') )
    'a=1&b=c'
    >>> urlencode( cgi.parse_qs('a=á&a=2') )
    'a=%C3%A1&a=2'
    >>> urlencode( 'a=b&c=d' )
    Traceback (most recent call last):
      File "<stdin>", line 1, in ?
    TypeError: not a valid non-string sequence or mapping object
    '''

    if hasattr(query,"items"):
        # mapping objects
        query = query.items()
    else:
        # it's a bother at times that strings and string-like objects are
        # sequences...
        try:
            # non-sequence items should not work with len()
            # non-empty strings will fail this
            if len(query) and not isinstance(query[0], tuple):
                raise TypeError
            # zero-length sequences of all types will get here and succeed,
            # but that's a minor nit - since the original implementation
            # allowed empty dicts that type of behavior probably should be
            # preserved for consistency
        except TypeError:
            ty,va,tb = sys.exc_info()
            raise TypeError, "not a valid non-string sequence or mapping object", tb

    l = []
    if not doseq:
        # preserve old behavior
        for k, v in query:
            
            # keys are easy
            k = urllib.quote_plus(str(k), safe)
            
            # Check for [] in the value
            if isinstance(v, list):
                for v_item in v:
                    v_item = urllib.quote_plus(str(v_item), safe)
                    l.append(k + '=' + v_item)
            else:
                v = urllib.quote_plus(str(v), safe)
                l.append(k + '=' + v)
    else:
        for k, v in query:
            # keys are easy...
            k = urllib.quote_plus(str(k), safe)
            
            # now the value...
            # is string
            if isinstance(v, str):
                v = urllib.quote_plus(v, safe)
                l.append(k + '=' + v)
    
            # is unicode...
            elif urllib._is_unicode(v):
                # is there a reasonable way to convert to ASCII?
                # encode generates a string, but "replace" or "ignore"
                # lose information and "strict" can raise UnicodeError
                v = urllib.quote_plus(v.encode("ASCII","replace"), safe)
                l.append(k + '=' + v)
                
            else:
                try:
                    # is this a sufficient test for sequence-ness?
                    x = len(v)
                except TypeError:
                    # not a sequence
                    v = urllib.quote_plus(str(v), safe)
                    l.append(k + '=' + v)
                else:
                    # loop over the sequence
                    for elt in v:
                        l.append(k + '=' + urllib.quote_plus(str(elt), safe))
                        
    return '&'.join(l)
Esempio n. 10
0
def ebs_urlencode(query,doseq=0):
    """Encode a sequence of two-element tuples or dictionary into a URL query string.

    If any values in the query arg are sequences and doseq is true, each
    sequence element is converted to a separate parameter.

    If the query arg is a sequence of two-element tuples, the order of the
    parameters in the output will match the order of parameters in the
    input.

    This is different from the Python version in urllib as it uses quote instead
    of quote_plus for compatibility with the EBS payment gateway.
    """

    if hasattr(query,"items"):
        # mapping objects
        query = query.items()
    else:
        # it's a bother at times that strings and string-like objects are
        # sequences...
        try:
            # non-sequence items should not work with len()
            # non-empty strings will fail this
            if len(query) and not isinstance(query[0], tuple):
                raise TypeError
            # zero-length sequences of all types will get here and succeed,
            # but that's a minor nit - since the original implementation
            # allowed empty dicts that type of behavior probably should be
            # preserved for consistency
        except TypeError:
            ty,va,tb = sys.exc_info()
            raise TypeError, "not a valid non-string sequence or mapping object", tb

    l = []
    if not doseq:
        # preserve old behavior
        for k, v in query:
            k = quote_plus(str(k))
            v = quote_plus(str(v))
            l.append(k + '=' + v)
    else:
        for k, v in query:
            k = quote_plus(str(k))
            if isinstance(v, str):
                v = quote_plus(v)
                l.append(k + '=' + v)
            elif _is_unicode(v):
                # is there a reasonable way to convert to ASCII?
                # encode generates a string, but "replace" or "ignore"
                # lose information and "strict" can raise UnicodeError
                v = quote_plus(v.encode("ASCII","replace"))
                l.append(k + '=' + v)
            else:
                try:
                    # is this a sufficient test for sequence-ness?
                    x = len(v)
                except TypeError:
                    # not a sequence
                    v = quote_plus(str(v))
                    l.append(k + '=' + v)
                else:
                    # loop over the sequence
                    for elt in v:
                        l.append(k + '=' + quote_plus(str(elt)))
    return '&'.join(l)
Esempio n. 11
0
def urlencode(query):
    """Encode a sequence of two-element tuples or dictionary into a URL query string.

    This version is adapted from the standard library to understand operators in the
    pyesgf.search.constraints module.

    If the query arg is a sequence of two-element tuples, the order of the
    parameters in the output will match the order of parameters in the
    input.
    """

    if hasattr(query,"items"):
        # mapping objects
        query = query.items()
    else:
        # it's a bother at times that strings and string-like objects are
        # sequences...
        try:
            # non-sequence items should not work with len()
            # non-empty strings will fail this
            if len(query) and not isinstance(query[0], tuple):
                raise TypeError
            # zero-length sequences of all types will get here and succeed,
            # but that's a minor nit - since the original implementation
            # allowed empty dicts that type of behavior probably should be
            # preserved for consistency
        except TypeError:
            ty,va,tb = sys.exc_info()
            raise TypeError, "not a valid non-string sequence or mapping object", tb


    def append(k, v, tag, l):
        from .search.consts import OPERATOR_NEQ

        if tag == OPERATOR_NEQ:
            l.append('%s!=%s' % (k, v))
        elif tag is None:
            l.append('%s=%s' % (k, v))
        else:
            raise ValueError('Unknown operator tag %s' % tag)

    def strip_tag(v):
        if type(v) == tuple:
            tag, v = v
        else:
            tag = None
            
        return tag, v

    l = []
    for k, v in query:
        tag, v = strip_tag(v)
        k = quote_plus(str(k))
        if isinstance(v, str):
            v = quote_plus(v)
            append(k, v, tag, l)
        elif _is_unicode(v):
            # is there a reasonable way to convert to ASCII?
            # encode generates a string, but "replace" or "ignore"
            # lose information and "strict" can raise UnicodeError
            v = quote_plus(v.encode("ASCII","replace"))
            append(k, v, tag, l)
        else:
            try:
                # is this a sufficient test for sequence-ness?
                len(v)
            except TypeError:
                # not a sequence
                v = quote_plus(str(v))
                append(k, v, tag, l)
            else:
                # loop over the sequence
                for elt in v:
                    append(k, quote_plus(str(elt)), tag, l)
    return '&'.join(l)