def _stringprep(s, check_unassigned, mapping, normalization, prohibited, bidi): """Implement a stringprep profile as defined in RFC 3454""" if not isinstance(s, str): raise TypeError('argument 0 must be str, not %s' % type(s).__name__) if check_unassigned: for c in s: if stringprep.in_table_a1(c): raise SASLPrepError('Unassigned character: %r' % c) if mapping: s = mapping(s) if normalization: s = unicodedata.normalize(normalization, s) if prohibited: for c in s: for lookup in prohibited: if lookup(c): raise SASLPrepError('Prohibited character: %r' % c) if bidi: _check_bidi(s) return s
def _stringprep(s, check_unassigned, mapping, normalization, prohibited, bidi): """Implement a stringprep profile as defined in RFC 3454""" if not isinstance(s, str): raise TypeError('argument 0 must be str, not %s' % type(s).__name__) if check_unassigned: # pragma: no branch for c in s: if stringprep.in_table_a1(c): raise SASLPrepError('Unassigned character: %r' % c) if mapping: # pragma: no branch s = mapping(s) if normalization: # pragma: no branch s = unicodedata.normalize(normalization, s) if prohibited: # pragma: no branch for c in s: for lookup in prohibited: if lookup(c): raise SASLPrepError('Prohibited character: %r' % c) if bidi: # pragma: no branch _check_bidi(s) return s
def saslprep(s, allow_unassigned = False): ''' Prepare Unicode string s according to SASLprep: Stringprep Profile for User Names and Passwords, a.k.a. RFC 4013 If the optional parameter allow_unassigned is set to True, unassigned codepoints will be allowed. This is recommended for query terms and other non-storing situations only. The return value is a Unicode string appropriately prepared. Disallowed input leads to a ValueError. ''' if type(s) != type(u''): raise TypeError("input must be a Unicode string") # phase 1: mapping s = u''.join([ stringprep.in_table_c12(ch) and u' ' or ch for ch in unichars(s) if not stringprep.in_table_b1(ch) ]) # phase 2: normalization s = unicodedata.normalize('NFKC', s) # phase 3: prohibition for ch in unichars(s): if stringprep.in_table_c12(ch): raise ValueError("prohibited non-ASCII space character") if stringprep.in_table_c21(ch): raise ValueError("prohibited ASCII control character") if stringprep.in_table_c22(ch): raise ValueError("prohibited non-ASCII control character") if stringprep.in_table_c3(ch): raise ValueError("prohibited private use character") if stringprep.in_table_c4(ch): raise ValueError("prohibited non-character code point") if stringprep.in_table_c5(ch): raise ValueError("prohibited surrogate code point") if stringprep.in_table_c6(ch): raise ValueError("prohibited character inappropriate for plain text") if stringprep.in_table_c7(ch): raise ValueError("prohibited character inappropriate for canonical representation") if stringprep.in_table_c8(ch): raise ValueError("prohibited character changing display properties, or a deprecated character") if stringprep.in_table_c9(ch): raise ValueError("prohibited tagging character") # phase 4: bidi check bidi_map = ''.join([ stringprep.in_table_d1(ch) and 'r' or stringprep.in_table_d2(ch) and 'l' or 'x' for ch in unichars(s) ]) if 'r' in bidi_map: if 'l' in bidi_map: raise ValueError("prohibited mixture of strong left-to-right and right-to-left text") if bidi_map[0] != 'r' or bidi_map[-1] != 'r': raise ValueError("string containing right-to-left text must start and end with right-to-left text") # phase 5: unassigned check if not allow_unassigned: for ch in unichars(s): if stringprep.in_table_a1(ch): raise ValueError("prohibited unassigned code point") return s
def nodeprep( foo, errors='strict' ): if foo is None: return None if isinstance( foo, str ): foo = foo.decode( 'utf-8' ) ofoo = u'' for x in foo: if not stringprep.in_table_b1( x ): ofoo += stringprep.map_table_b2( x ) foo = unicodedata.normalize( 'NFKC', ofoo ) ofoo = u'' first_is_randal = False if len(foo): first_is_randal = stringprep.in_table_d1( foo[0] ) if first_is_randal: if not stringprep.in_table_d1( foo[-1] ): raise UnicodeError, "Section 6.3 [end]" for x in range(len(foo)): if errors=='strict' and stringprep.in_table_a1( foo[x] ): raise UnicodeError, "Unassigned Codepoint" if stringprep.in_table_c11( foo[x] ): raise UnicodeError, "In table C.1.1" if stringprep.in_table_c12( foo[x] ): raise UnicodeError, "In table C.1.2" if stringprep.in_table_c21( foo[x] ): raise UnicodeError, "In table C.2.1" if stringprep.in_table_c22( foo[x] ): raise UnicodeError, "In table C.2.2" if stringprep.in_table_c3( foo[x] ): raise UnicodeError, "In table C.3" if stringprep.in_table_c4( foo[x] ): raise UnicodeError, "In table C.4" if stringprep.in_table_c5( foo[x] ): raise UnicodeError, "In table C.5" if stringprep.in_table_c6( foo[x] ): raise UnicodeError, "In table C.6" if stringprep.in_table_c7( foo[x] ): raise UnicodeError, "In table C.7" if stringprep.in_table_c8( foo[x] ): raise UnicodeError, "In table C.8" if stringprep.in_table_c9( foo[x] ): raise UnicodeError, "In table C.9" if foo[x] in "\"&'/;<>@": raise UnicodeError, "In nodeprep banned list" if x: if first_is_randal and stringprep.in_table_d2( foo[x] ): raise UnicodeError, "Section 6.2" if not first_is_randal and x!=(len(foo)-1) and stringprep.in_table_d1( foo[x] ): raise UnicodeError, "Section 6.3" else: first = False return foo
def is_valid_rfc3491(domainname): """ Checks if the given domain would pass processing by nameprep unscathed. :param domainname: The unicode string of the domain name. :return: True if the unicode is valid (i.e. only uses Unicode 3.2 code points) """ valid_rfc3491 = True for char in domainname: if stringprep.in_table_a1(char): valid_rfc3491 = False break return valid_rfc3491
def canonicaliseUsername(username, ignoreSpaces = False, throws = True): # Read stringprep documentation for the meaning of the tables chars = list(username) for c, char in enumerate(chars): if stringprep.in_table_a1(char): if throws: raise ValueError else: chars[c] = u"" elif stringprep.in_table_b1(char): chars[c] = u"" else: chars[c] = stringprep.map_table_b2(char) chars = list(stringprep.unicodedata.normalize("NFKC", u"".join(chars))) for c, char in enumerate(chars): if ((not ignoreSpaces and stringprep.in_table_c11_c12(char)) or stringprep.in_table_c21_c22(char) or stringprep.in_table_c3(char) or stringprep.in_table_c4(char) or stringprep.in_table_c5(char) or stringprep.in_table_c6(char) or stringprep.in_table_c7(char) or stringprep.in_table_c8(char) or stringprep.in_table_c9(char) or stringprep.unicodedata.category(char) in ("Ps", "Pe", "Pi", "Pf", "Po")): if throws: raise ValueError else: chars[c] = u"" chars = u"".join(chars) if throws: RandAL = map(stringprep.in_table_d1, chars) for c in RandAL: if c: if filter(stringprep.in_table_d2, chars): raise ValueError if not RandAL[0] or not RandAL[-1]: raise ValueError return chars
def saslprep(text, strict=True): """ Return a processed version of the given string, using the SASLPrep profile of stringprep. :param text: The string to process, in UTF-8. :param strict: If ``True``, prevent the use of unassigned code points. """ if sys.version_info < (3, 0): if type(text) == str: text = text.decode('utf-8') # Mapping: # # - non-ASCII space characters [StringPrep, C.1.2] that can be # mapped to SPACE (U+0020), and # # - the 'commonly mapped to nothing' characters [StringPrep, B.1] # that can be mapped to nothing. buffer = '' for char in text: if stringprep.in_table_c12(char): buffer += ' ' elif not stringprep.in_table_b1(char): buffer += char # Normalization using form KC text = unicodedata.normalize('NFKC', buffer) # Check for bidirectional string buffer = '' first_is_randal = False if text: first_is_randal = stringprep.in_table_d1(text[0]) if first_is_randal and not stringprep.in_table_d1(text[-1]): raise SASLPrepFailure('Section 6.3 [end]') # Check for prohibited characters for x in range(len(text)): if strict and stringprep.in_table_a1(text[x]): raise SASLPrepFailure('Unassigned Codepoint') if stringprep.in_table_c12(text[x]): raise SASLPrepFailure('In table C.1.2') if stringprep.in_table_c21(text[x]): raise SASLPrepFailure('In table C.2.1') if stringprep.in_table_c22(text[x]): raise SASLPrepFailure('In table C.2.2') if stringprep.in_table_c3(text[x]): raise SASLPrepFailure('In table C.3') if stringprep.in_table_c4(text[x]): raise SASLPrepFailure('In table C.4') if stringprep.in_table_c5(text[x]): raise SASLPrepFailure('In table C.5') if stringprep.in_table_c6(text[x]): raise SASLPrepFailure('In table C.6') if stringprep.in_table_c7(text[x]): raise SASLPrepFailure('In table C.7') if stringprep.in_table_c8(text[x]): raise SASLPrepFailure('In table C.8') if stringprep.in_table_c9(text[x]): raise SASLPrepFailure('In table C.9') if x: if first_is_randal and stringprep.in_table_d2(text[x]): raise SASLPrepFailure('Section 6.2') if not first_is_randal and \ x != len(text) - 1 and \ stringprep.in_table_d1(text[x]): raise SASLPrepFailure('Section 6.3') return text
def check_unassigneds(self, string): for c in string: if stringprep.in_table_a1(c): raise UnicodeError, "Unassigned code point %s" % repr(c)
def prohibited_output_profile(string): """RFC4013 Prohibited output profile implementation.""" # Implements: # RFC4013, 2.3. Prohibited Output. # This profile specifies the following characters as prohibited input: # - Non-ASCII space characters [StringPrep, C.1.2] # - ASCII control characters [StringPrep, C.2.1] # - Non-ASCII control characters [StringPrep, C.2.2] # - Private Use characters [StringPrep, C.3] # - Non-character code points [StringPrep, C.4] # - Surrogate code points [StringPrep, C.5] # - Inappropriate for plain text characters [StringPrep, C.6] # - Inappropriate for canonical representation characters [StringPrep, C.7] # - Change display properties or deprecated characters [StringPrep, C.8] # - Tagging characters [StringPrep, C.9] # RFC4013, 2.4. Bidirectional Characters. # RFC4013, 2.5. Unassigned Code Points. # Determine how to handle bidirectional characters (RFC3454): if is_ral_string(string): # If a string contains any RandALCat characters, # The string MUST NOT contain any LCat character: is_prohibited_bidi_ch = in_table_d2 bidi_table = 'D.2' else: # Forbid RandALCat characters in LCat string: is_prohibited_bidi_ch = in_table_d1 bidi_table = 'D.1' RFC = 'RFC4013' for c in string: # RFC4013 2.3. Prohibited Output: if in_table_c12(c): raise ValueError('%s: prohibited non-ASCII space characters ' 'that cannot be replaced (C.1.2).' % RFC) if in_table_c21_c22(c): raise ValueError('%s: prohibited control characters (C.2.1).' % RFC) if in_table_c3(c): raise ValueError('%s: prohibited private Use characters (C.3).' % RFC) if in_table_c4(c): raise ValueError( '%s: prohibited non-character code points (C.4).' % RFC) if in_table_c5(c): raise ValueError('%s: prohibited surrogate code points (C.5).' % RFC) if in_table_c6(c): raise ValueError('%s: prohibited inappropriate for plain text ' 'characters (C.6).' % RFC) if in_table_c7(c): raise ValueError('%s: prohibited inappropriate for canonical ' 'representation characters (C.7).' % RFC) if in_table_c8(c): raise ValueError('%s: prohibited change display properties / ' 'deprecated characters (C.8).' % RFC) if in_table_c9(c): raise ValueError('%s: prohibited tagging characters (C.9).' % RFC) # RFC4013, 2.4. Bidirectional Characters: if is_prohibited_bidi_ch(c): raise ValueError('%s: prohibited bidi characters (%s).' % (RFC, bidi_table)) # RFC4013, 2.5. Unassigned Code Points: if in_table_a1(c): raise ValueError('%s: prohibited unassigned code points (A.1).' % RFC)
def saslprep(text, strict=True): """ Return a processed version of the given string, using the SASLPrep profile of stringprep. :param text: The string to process, in UTF-8. :param strict: If ``True``, prevent the use of unassigned code points. """ if sys.version_info < (3, 0): if type(text) == str: text = text.decode("utf-8") # Mapping: # # - non-ASCII space characters [StringPrep, C.1.2] that can be # mapped to SPACE (U+0020), and # # - the 'commonly mapped to nothing' characters [StringPrep, B.1] # that can be mapped to nothing. buffer = "" for char in text: if stringprep.in_table_c12(char): buffer += " " elif not stringprep.in_table_b1(char): buffer += char # Normalization using form KC text = unicodedata.normalize("NFKC", buffer) # Check for bidirectional string buffer = "" first_is_randal = False if text: first_is_randal = stringprep.in_table_d1(text[0]) if first_is_randal and not stringprep.in_table_d1(text[-1]): raise SASLPrepFailure("Section 6.3 [end]") # Check for prohibited characters for x in range(len(text)): if strict and stringprep.in_table_a1(text[x]): raise SASLPrepFailure("Unassigned Codepoint") if stringprep.in_table_c12(text[x]): raise SASLPrepFailure("In table C.1.2") if stringprep.in_table_c21(text[x]): raise SASLPrepFailure("In table C.2.1") if stringprep.in_table_c22(text[x]): raise SASLPrepFailure("In table C.2.2") if stringprep.in_table_c3(text[x]): raise SASLPrepFailure("In table C.3") if stringprep.in_table_c4(text[x]): raise SASLPrepFailure("In table C.4") if stringprep.in_table_c5(text[x]): raise SASLPrepFailure("In table C.5") if stringprep.in_table_c6(text[x]): raise SASLPrepFailure("In table C.6") if stringprep.in_table_c7(text[x]): raise SASLPrepFailure("In table C.7") if stringprep.in_table_c8(text[x]): raise SASLPrepFailure("In table C.8") if stringprep.in_table_c9(text[x]): raise SASLPrepFailure("In table C.9") if x: if first_is_randal and stringprep.in_table_d2(text[x]): raise SASLPrepFailure("Section 6.2") if not first_is_randal and x != len(text) - 1 and stringprep.in_table_d1(text[x]): raise SASLPrepFailure("Section 6.3") return text
def update_event(self, inp=-1): self.set_output_val(0, stringprep.in_table_a1(self.input(0)))
import stringprep import unicodedata print(u'\ua62b') print(unicodedata.category(u"\ua62b")) # Lo print(stringprep.in_table_a1(u'\ua62b')) print(stringprep.in_table_a1(chr(0x1F475))) print(u'\ua62c') #꘬ å› ä¸ºæ²¡æœ‰å®šä¹‰ print(unicodedata.name(u"\ua62c")) print(stringprep.in_table_a1(u'\ua62c')) print(unicodedata.category(u"\ua62c")) # Cn #https://baike.baidu.com/item/Unicode/750500?fr=aladdin print(u'\u4e00') # https://home.unicode.org/# print(u'\uA755') print(u'\u03c0') print(chr(0x03c0)) print(chr(0x1F4AF)) # 💯
def check_unassigned(unistr): for c in unistr: if stringprep.in_table_a1(c): raise IdentifierError(IdentifierErrors.restriction_unassigned)
def __is_unassigned_code_point(char: str): return stringprep.in_table_a1(char)
def check_unassigneds(self, string): for c in string: if stringprep.in_table_a1(c): raise UnicodeError("Unassigned code point %s" % repr(c))
def saslprep(s, allow_unassigned=False): ''' Prepare Unicode string s according to SASLprep: Stringprep Profile for User Names and Passwords, a.k.a. RFC 4013 If the optional parameter allow_unassigned is set to True, unassigned codepoints will be allowed. This is recommended for query terms and other non-storing situations only. The return value is a Unicode string appropriately prepared. Disallowed input leads to a ValueError. ''' if type(s) != type(u''): raise TypeError("input must be a Unicode string") # phase 1: mapping s = u''.join([ stringprep.in_table_c12(ch) and u' ' or ch for ch in unichars(s) if not stringprep.in_table_b1(ch) ]) # phase 2: normalization s = unicodedata.normalize('NFKC', s) # phase 3: prohibition for ch in unichars(s): if stringprep.in_table_c12(ch): raise ValueError("prohibited non-ASCII space character") if stringprep.in_table_c21(ch): raise ValueError("prohibited ASCII control character") if stringprep.in_table_c22(ch): raise ValueError("prohibited non-ASCII control character") if stringprep.in_table_c3(ch): raise ValueError("prohibited private use character") if stringprep.in_table_c4(ch): raise ValueError("prohibited non-character code point") if stringprep.in_table_c5(ch): raise ValueError("prohibited surrogate code point") if stringprep.in_table_c6(ch): raise ValueError( "prohibited character inappropriate for plain text") if stringprep.in_table_c7(ch): raise ValueError( "prohibited character inappropriate for canonical representation" ) if stringprep.in_table_c8(ch): raise ValueError( "prohibited character changing display properties, or a deprecated character" ) if stringprep.in_table_c9(ch): raise ValueError("prohibited tagging character") # phase 4: bidi check bidi_map = ''.join([ stringprep.in_table_d1(ch) and 'r' or stringprep.in_table_d2(ch) and 'l' or 'x' for ch in unichars(s) ]) if 'r' in bidi_map: if 'l' in bidi_map: raise ValueError( "prohibited mixture of strong left-to-right and right-to-left text" ) if bidi_map[0] != 'r' or bidi_map[-1] != 'r': raise ValueError( "string containing right-to-left text must start and end with right-to-left text" ) # phase 5: unassigned check if not allow_unassigned: for ch in unichars(s): if stringprep.in_table_a1(ch): raise ValueError("prohibited unassigned code point") return s