Python TextEncoderの例、otp.otpbase.OTPModules.TextEncoder Pythonの例

コード例 #1

0

ファイルを表示

 def fontHasCharacters(name, font=font):
     if font:
         tn = TextNode('NameCheck')
         tn.setFont(font)
         for c in name:
             if not tn.hasCharacter(str(c)):
                 notify.info('name contains bad char: %s' %
                             TextEncoder().encodeWtext(c))
                 return OTPLocalizer.NCBadCharacter % TextEncoder(
                 ).encodeWtext(c)

コード例 #2

0

ファイルを表示

 def badCharacters(name, _validCharacter=_validCharacter):
     for char in name:
         if not _validCharacter(char):
             if char in string.digits:
                 notify.info('name contains digits')
                 return OTPLocalizer.NCNoDigits
             else:
                 notify.info('name contains bad char: %s' %
                             TextEncoder().encodeWtext(char))
                 return OTPLocalizer.NCBadCharacter % TextEncoder(
                 ).encodeWtext(char)

コード例 #3

0

ファイルを表示

 def perWord(word):
     word = word
     letters = justLetters(word)
     if len(letters) > 2:
         # make case-insensitive
         letters = TextEncoder().decodeText(
             TextEncoder.lower(TextEncoder().encodeWtext(
                 letters).decode('utf-8')).encode('utf-8'))
         filtered = filterString(letters, letters[0])
         if filtered == letters:
             notify.info('word "%s" uses only one letter' %
                         TextEncoder().encodeWtext(word))
             return OTPLocalizer.NCGeneric

コード例 #4

0

ファイルを表示

    def checkJapanese(name):
        # Japan allows ASCII space, hiragana, katakana, and half-width katakana,
        # but, allows not ASCII and kanji(CJK) characters for a name
        # All Japanese characters are three-byte-encoded utf-8 characters from unicode
        # Reference: http://unicode.org/charts/
        asciiSpace = list(range(0x20, 0x21))
        asciiDigits = list(range(0x30, 0x40))
        hiragana = list(range(0x3041, 0x30A0))
        katakana = list(range(0x30A1, 0x3100))
        halfwidthKatakana = list(range(0xFF65, 0xFFA0))
        halfwidthCharacter = set(asciiSpace + halfwidthKatakana)
        allowedUtf8 = set(asciiSpace + hiragana + katakana + halfwidthKatakana)
        te = TextEncoder()
        dc = 0.0

        # Return None if name is OK, error string if name is not OK
        for char in (ord(char) for char in te.decodeText(name)):
            if char not in allowedUtf8:
                # Notify error string, if not allowed utf-8 character
                if char in asciiDigits:
                    notify.info('name contains not allowed ascii digits')
                    return OTPLocalizer.NCNoDigits
                else:
                    notify.info('name contains not allowed utf8 char: 0x%04x' %
                                char)
                    return OTPLocalizer.NCBadCharacter % te.encodeWtext(
                        chr(char))
            else:
                # Restrict the number of characters, if three-byte-encoded utf-8 character
                # The full-width characters would fit into a single display cell,
                # and the half-width characters would fit two to a display cell
                if char in halfwidthCharacter:
                    dc += 0.5
                else:
                    dc += 1

        # Japan restricts the number of the characters, if occupied less then two display cell
        # and more then eight display cell.
        if (dc < 2):
            notify.info('name is too short: %0.1f' % dc)
            return OTPLocalizer.NCTooShort
        elif (dc > 8):
            notify.info(
                'name has been occupied more than eight display cells: %0.1f' %
                dc)
            return OTPLocalizer.NCGeneric

コード例 #5

0

ファイルを表示

 def allCaps(name):
     # MICKEY MOUSE
     letters = justLetters(name)
     # J.T. -> OK
     if len(letters) > 2:
         upperLetters = TextEncoder().decodeText(
             TextEncoder.upper(TextEncoder().encodeWtext(letters).decode(
                 'utf-8')).encode('utf-8'))
         # some unicode characters can't be capitalized
         for i in range(len(upperLetters)):
             if not upperLetters[0].isupper():
                 # at least one letter is not upper-case
                 # name is not all-caps
                 # excessive capitalization will be caught by mixedCase()
                 return
         if upperLetters == letters:
             notify.info('name is all caps')
             return OTPLocalizer.NCAllCaps

コード例 #6

0

ファイルを表示

    def hasLetters(name):
        #,...,
        words = wordList(name)
        for word in words:
            letters = justLetters(word)

            if len(letters) == 0:
                notify.info('word "%s" has no letters' %
                            TextEncoder().encodeWtext(word))
                return OTPLocalizer.NCNeedLetters

コード例 #7

0

ファイルを表示

    def repeatedChars(name):
        count = 1
        lastChar = None
        i = 0
        while i < len(name):
            char = name[i]
            i += 1

            if char == lastChar:
                # character is repeating
                count += 1
            else:
                count = 1

            lastChar = char

            if count > 2:
                notify.info('character %s is repeated too many times' %
                            TextEncoder().encodeWtext(char))
                return OTPLocalizer.NCRepeatedChar % TextEncoder().encodeWtext(
                    char)

コード例 #8

0

ファイルを表示

 def checkApostrophes(name):
     words = wordList(name)
     for word in words:
         numApos = word.count("'")
         if numApos > 2:
             notify.info('word "%s" has too many apostrophes.' %
                         TextEncoder().encodeWtext(word))
             return OTPLocalizer.NCApostrophes
     numApos = name.count("'")
     if numApos > 3:
         notify.info('name has too many apostrophes.')
         return OTPLocalizer.NCApostrophes

コード例 #9

0

ファイルを表示

    def checkPeriods(name):
        """ periods are allowed at the end of words, or in two-letter
        words, like 'J.T.' """
        words = wordList(name)
        for word in words:
            # strip off any trailing commas
            if word[-1] == ',':
                word = word[:-1]

            numPeriods = word.count('.')
            if not numPeriods:
                continue

            letters = justLetters(word)
            numLetters = len(letters)

            # word must end in '.'
            if word[-1] != '.':
                notify.info('word "%s" does not end in a period' %
                            TextEncoder().encodeWtext(word))
                return OTPLocalizer.NCPeriodUsage

            # max periods is 2
            if numPeriods > 2:
                notify.info('word "%s" has too many periods' %
                            TextEncoder().encodeWtext(word))
                return OTPLocalizer.NCPeriodUsage

            if numPeriods == 2:
                # 2nd and 4th characters should be periods
                if not ((word[1] == '.') and (word[3] == '.')):
                    notify.info('word "%s" does not fit the J.T. pattern' %
                                TextEncoder().encodeWtext(word))
                    return OTPLocalizer.NCPeriodUsage

        return None

コード例 #10

0

ファイルを表示

        def perWord(word):
            # if there's a period, assume it's an abbrevation
            if '.' in word:
                return None

            # Check if there's an extended character; if
            # so, it might be a vowel.
            for char in word:
                if ord(char) >= 0x80:
                    return None

            letters = filterString(word, string.ascii_letters)
            # things like 'MD' are ok without periods
            if len(letters) > 2:
                vowels = filterString(letters, 'aeiouyAEIOUY')
                if len(vowels) == 0:
                    notify.info('word "%s" has no vowels' %
                                TextEncoder().encodeWtext(word))
                    return OTPLocalizer.NCNeedVowels

コード例 #11

0

ファイルを表示

def checkName(name, otherCheckFuncs=[], font=None):
    print("CheckName:", name, "type:", type(name))

    # misc check functions;
    # name should not be a wide-character string (for example it should be utf-8)
    # return None if name is OK, error string if name is not OK
    # check functions are given unicode strings
    # if font is passed in, checkName() will make sure all characters in the name
    # are valid characters in the font
    def longEnough(name):
        if len(name) < 2:
            notify.info('name is too short')
            return OTPLocalizer.NCTooShort

    def emptyName(name):
        if name.strip() == '':
            notify.info('name is empty')
            return OTPLocalizer.NCTooShort

    def printableChars(name):
        for char in name:
            # If it is an extended character, we cannot test it for printability here (but
            # presumably it is some printable character.)
            if ord(char) < 0x80 and char not in string.printable:
                notify.info('name contains non-printable char #%s' % ord(char))
                return OTPLocalizer.NCGeneric

    validAsciiChars = set(".,'-" + string.ascii_letters + string.whitespace)

    def _validCharacter(c, validAsciiChars=validAsciiChars, font=font):
        if c in validAsciiChars:
            return True
        # check for Unicode alphabetic characters and whitespace
        if c.isalpha() or c.isspace():
            return True
        return False

    def badCharacters(name, _validCharacter=_validCharacter):
        for char in name:
            if not _validCharacter(char):
                if char in string.digits:
                    notify.info('name contains digits')
                    return OTPLocalizer.NCNoDigits
                else:
                    notify.info('name contains bad char: %s' %
                                TextEncoder().encodeWtext(char))
                    return OTPLocalizer.NCBadCharacter % TextEncoder(
                    ).encodeWtext(char)

    def fontHasCharacters(name, font=font):
        if font:
            tn = TextNode('NameCheck')
            tn.setFont(font)
            for c in name:
                if not tn.hasCharacter(str(c)):
                    notify.info('name contains bad char: %s' %
                                TextEncoder().encodeWtext(c))
                    return OTPLocalizer.NCBadCharacter % TextEncoder(
                    ).encodeWtext(c)

    def hasLetters(name):
        #,...,
        words = wordList(name)
        for word in words:
            letters = justLetters(word)

            if len(letters) == 0:
                notify.info('word "%s" has no letters' %
                            TextEncoder().encodeWtext(word))
                return OTPLocalizer.NCNeedLetters

    def hasVowels(name):
        # ndssmvwls
        def perWord(word):
            # if there's a period, assume it's an abbrevation
            if '.' in word:
                return None

            # Check if there's an extended character; if
            # so, it might be a vowel.
            for char in word:
                if ord(char) >= 0x80:
                    return None

            letters = filterString(word, string.ascii_letters)
            # things like 'MD' are ok without periods
            if len(letters) > 2:
                vowels = filterString(letters, 'aeiouyAEIOUY')
                if len(vowels) == 0:
                    notify.info('word "%s" has no vowels' %
                                TextEncoder().encodeWtext(word))
                    return OTPLocalizer.NCNeedVowels

        for word in wordList(name):
            problem = perWord(word)
            if problem:
                return problem

    def monoLetter(name):
        # eeeeeeeee
        def perWord(word):
            word = word
            letters = justLetters(word)
            if len(letters) > 2:
                # make case-insensitive
                letters = TextEncoder().decodeText(
                    TextEncoder.lower(TextEncoder().encodeWtext(
                        letters).decode('utf-8')).encode('utf-8'))
                filtered = filterString(letters, letters[0])
                if filtered == letters:
                    notify.info('word "%s" uses only one letter' %
                                TextEncoder().encodeWtext(word))
                    return OTPLocalizer.NCGeneric

        for word in wordList(name):
            problem = perWord(word)
            if problem:
                return problem

    def checkDashes(name):
        def validDash(index, name=name):
            # if the dash is at the beginning or the end, fail
            if (index == 0) or (i == len(name) - 1):
                return 0
            # dash must be surrounded by letters on both sides
            if not (name[i - 1].isalpha()):
                return 0
            if not (name[i + 1].isalpha()):
                return 0
            return 1

        i = 0
        while 1:
            i = name.find('-', i, len(name))
            if i < 0:
                return None
            if not validDash(i):
                notify.info('name makes invalid use of dashes')
                return OTPLocalizer.NCDashUsage
            i += 1

    def checkCommas(name):
        def validComma(index, name=name):
            # if the comma is at the beginning or the end, fail
            if (index == 0) or (i == len(name) - 1):
                return OTPLocalizer.NCCommaEdge
            # comma must follow a word and be followed by a space
            if (name[i - 1].isspace()):
                return OTPLocalizer.NCCommaAfterWord
            if not (name[i + 1].isspace()):
                return OTPLocalizer.NCCommaUsage
            return None

        i = 0
        while 1:
            i = name.find(',', i, len(name))
            if i < 0:
                return None
            problem = validComma(i)
            if problem:
                notify.info('name makes invalid use of commas')
                return problem
            i += 1

    def checkPeriods(name):
        """ periods are allowed at the end of words, or in two-letter
        words, like 'J.T.' """
        words = wordList(name)
        for word in words:
            # strip off any trailing commas
            if word[-1] == ',':
                word = word[:-1]

            numPeriods = word.count('.')
            if not numPeriods:
                continue

            letters = justLetters(word)
            numLetters = len(letters)

            # word must end in '.'
            if word[-1] != '.':
                notify.info('word "%s" does not end in a period' %
                            TextEncoder().encodeWtext(word))
                return OTPLocalizer.NCPeriodUsage

            # max periods is 2
            if numPeriods > 2:
                notify.info('word "%s" has too many periods' %
                            TextEncoder().encodeWtext(word))
                return OTPLocalizer.NCPeriodUsage

            if numPeriods == 2:
                # 2nd and 4th characters should be periods
                if not ((word[1] == '.') and (word[3] == '.')):
                    notify.info('word "%s" does not fit the J.T. pattern' %
                                TextEncoder().encodeWtext(word))
                    return OTPLocalizer.NCPeriodUsage

        return None

    def checkApostrophes(name):
        words = wordList(name)
        for word in words:
            numApos = word.count("'")
            if numApos > 2:
                notify.info('word "%s" has too many apostrophes.' %
                            TextEncoder().encodeWtext(word))
                return OTPLocalizer.NCApostrophes
        numApos = name.count("'")
        if numApos > 3:
            notify.info('name has too many apostrophes.')
            return OTPLocalizer.NCApostrophes

    def tooManyWords(name):
        if len(wordList(name)) > 4:
            notify.info('name has too many words')
            return OTPLocalizer.NCTooManyWords

    def allCaps(name):
        # MICKEY MOUSE
        letters = justLetters(name)
        # J.T. -> OK
        if len(letters) > 2:
            upperLetters = TextEncoder().decodeText(
                TextEncoder.upper(TextEncoder().encodeWtext(letters).decode(
                    'utf-8')).encode('utf-8'))
            # some unicode characters can't be capitalized
            for i in range(len(upperLetters)):
                if not upperLetters[0].isupper():
                    # at least one letter is not upper-case
                    # name is not all-caps
                    # excessive capitalization will be caught by mixedCase()
                    return
            if upperLetters == letters:
                notify.info('name is all caps')
                return OTPLocalizer.NCAllCaps

    def mixedCase(name):
        # MiCkeY MoUsE
        words = wordList(name)
        for word in words:
            if len(word) > 2:
                # allow McQuack
                capitals = justUpper(word)
                if len(capitals) > 2:
                    notify.info('name has mixed case')
                    return OTPLocalizer.NCMixedCase

    def checkJapanese(name):
        # Japan allows ASCII space, hiragana, katakana, and half-width katakana,
        # but, allows not ASCII and kanji(CJK) characters for a name
        # All Japanese characters are three-byte-encoded utf-8 characters from unicode
        # Reference: http://unicode.org/charts/
        asciiSpace = list(range(0x20, 0x21))
        asciiDigits = list(range(0x30, 0x40))
        hiragana = list(range(0x3041, 0x30A0))
        katakana = list(range(0x30A1, 0x3100))
        halfwidthKatakana = list(range(0xFF65, 0xFFA0))
        halfwidthCharacter = set(asciiSpace + halfwidthKatakana)
        allowedUtf8 = set(asciiSpace + hiragana + katakana + halfwidthKatakana)
        te = TextEncoder()
        dc = 0.0

        # Return None if name is OK, error string if name is not OK
        for char in (ord(char) for char in te.decodeText(name)):
            if char not in allowedUtf8:
                # Notify error string, if not allowed utf-8 character
                if char in asciiDigits:
                    notify.info('name contains not allowed ascii digits')
                    return OTPLocalizer.NCNoDigits
                else:
                    notify.info('name contains not allowed utf8 char: 0x%04x' %
                                char)
                    return OTPLocalizer.NCBadCharacter % te.encodeWtext(
                        chr(char))
            else:
                # Restrict the number of characters, if three-byte-encoded utf-8 character
                # The full-width characters would fit into a single display cell,
                # and the half-width characters would fit two to a display cell
                if char in halfwidthCharacter:
                    dc += 0.5
                else:
                    dc += 1

        # Japan restricts the number of the characters, if occupied less then two display cell
        # and more then eight display cell.
        if (dc < 2):
            notify.info('name is too short: %0.1f' % dc)
            return OTPLocalizer.NCTooShort
        elif (dc > 8):
            notify.info(
                'name has been occupied more than eight display cells: %0.1f' %
                dc)
            return OTPLocalizer.NCGeneric

    def repeatedChars(name):
        count = 1
        lastChar = None
        i = 0
        while i < len(name):
            char = name[i]
            i += 1

            if char == lastChar:
                # character is repeating
                count += 1
            else:
                count = 1

            lastChar = char

            if count > 2:
                notify.info('character %s is repeated too many times' %
                            TextEncoder().encodeWtext(char))
                return OTPLocalizer.NCRepeatedChar % TextEncoder().encodeWtext(
                    char)

    checks = [
        printableChars,
        badCharacters,
        fontHasCharacters,
        longEnough,
        emptyName,
        hasLetters,
        hasVowels,
        monoLetter,
        checkDashes,
        checkCommas,
        checkPeriods,
        checkApostrophes,
        tooManyWords,
        allCaps,
        mixedCase,
        repeatedChars,
    ] + otherCheckFuncs

    # checks that should be run on the reversed name string
    symmetricChecks = []

    # make sure we are working with a wide-character version of the string
    name = TextEncoder().decodeText(name.encode('utf-8'))
    notify.info('checking name "%s"...' % TextEncoder().encodeWtext(name))

    # run through all the checks
    for check in checks:
        problem = check(name[:])
        if (not problem) and (check in symmetricChecks):
            # check it backwards.
            nName = name[:]
            bName.reverse()
            problem = check(bName)
            print("problem = %s" % (problem))

        if problem:
            return problem

    return None

コード例 #12

0

ファイルを表示


# prevent log spam during testing
severity = notify.getSeverity()
notify.setSeverity(NSError)

if 0:
    # these tests can be removed or replaced for international versions of the codebase

    # long enough
    assert checkName('J')
    assert not checkName('Jo')
    # empty name
    assert checkName('')
    assert checkName('\t')
    assert checkName(TextEncoder().encodeWtext('\xa0'))
    assert checkName(TextEncoder().encodeWtext('\u1680'))
    assert checkName(TextEncoder().encodeWtext('\u2001'))
    # printable chars
    for i in range(32):
        assert checkName(chr(i))
    assert checkName(chr(0x7f))
    # bad characters
    for c in '!"#$%&()*+/:;<=>?@[\]^_`{|}~':
        assert checkName('Bob' + c)
    # has letters
    assert checkName(',...,')
    #   katakana = range(0x30A1, 0x30FB)
    #assert not checkName(TextEncoder().encodeWtext(u'\u30a1\u30a2'))
    # has vowels
    assert checkName('Qwrt')