Esempio n. 1
0
def pinyin_indexes(content):
	pinyin = find_pinyin(content)
	# assert type(pinyin) == unicode

	if not pinyin or pinyin == "_":
		return ()

	indexes = set()

	# multiple pronunciations
	for pinyin in re.split(r",|;", pinyin):

		# find all pinyin ranges, use them to rip pinyin out
		py = [
			r._slice(pinyin)
			for r in color.ranges_of_pinyin_in_string(pinyin)
		]

		# maybe no pinyin here
		if not py:
			return ()

		# just pinyin, with diacritics, separated by whitespace
		indexes.add("%s." % color.utf(" ".join(py)))

		# pinyin with diacritics replaced by tone numbers
		indexes.add("%s." % color.utf(" ".join(
			["%s%d" % (
				color.lowercase_string_by_removing_pinyin_tones(p),
				color.determine_tone(p)) for p in py])))
	return indexes
Esempio n. 2
0
def pinyin_indexes(content):
    pinyin = find_pinyin(content)
    # assert type(pinyin) == unicode

    if not pinyin or pinyin == '_':
        return ()

    indexes = set()

    # multiple pronunciations
    for pinyin in re.split(r',|;', pinyin):

        # find all pinyin ranges, use them to rip pinyin out
        py = [r._slice(pinyin) for r in color.ranges_of_pinyin_in_string(pinyin)]

        # maybe no pinyin here
        if not py:
            return ()

        # just pinyin, with diacritics, separated by whitespace
        indexes.add('%s.' % color.utf(' '.join(py)))

        # pinyin with diacritics replaced by tone numbers
        indexes.add('%s.' % color.utf(' '.join(
            ['%s%d' % (
                color.lowercase_string_by_removing_pinyin_tones(p),
                color.determine_tone(p)) for p in py])))
    return indexes
Esempio n. 3
0
def pinyin_indexes(content):
    pinyin = find_pinyin(content)
    # assert type(pinyin) == unicode

    if not pinyin or pinyin == '_':
        return ()

    indexes = set()

    # multiple pronunciations
    for pinyin in re.split(r',|;', pinyin):

        # find all pinyin ranges, use them to rip pinyin out
        py = [
            r._slice(pinyin) for r in color.ranges_of_pinyin_in_string(pinyin)
        ]

        # maybe no pinyin here
        if not py:
            return ()

        # just pinyin, with diacritics, separated by whitespace
        indexes.add('%s.' % color.utf(u' '.join(py)))

        # pinyin with diacritics replaced by tone numbers
        indexes.add('%s.' % color.utf(u' '.join(
            map(
                lambda p: '%s%d' %
                (color.lowercase_string_by_removing_pinyin_tones(p),
                 color.determine_tone(p)),
                # for each sub word
                py))))
    return indexes
Esempio n. 4
0
 def testFristTone(self):
     self.assertEqual(1, colorize_pinyin.determine_tone('fāng'))
     self.assertEqual(1, colorize_pinyin.determine_tone('yī'))
Esempio n. 5
0
 def testMixedPinyin(self):
     self.assertEqual(3, colorize_pinyin.determine_tone('bǎiwén'))
Esempio n. 6
0
 def testNonPinyin(self):
     self.assertEqual(0, colorize_pinyin.determine_tone('бурда'))
Esempio n. 7
0
 def testZeroTone(self):
     self.assertEqual(0, colorize_pinyin.determine_tone('de'))
     self.assertEqual(0, colorize_pinyin.determine_tone('ning'))
Esempio n. 8
0
 def testFourthTone(self, ):
     self.assertEqual(4, colorize_pinyin.determine_tone('àn'))
     self.assertEqual(4, colorize_pinyin.determine_tone('dìnggòu'))
Esempio n. 9
0
 def testThirdTone(self):
     self.assertEqual(3, colorize_pinyin.determine_tone('fǎn'))
     self.assertEqual(3, colorize_pinyin.determine_tone('lǚ'))
Esempio n. 10
0
 def testSecondTone(self):
     self.assertEqual(2, colorize_pinyin.determine_tone('gán'))
     self.assertEqual(2, colorize_pinyin.determine_tone('xún'))
Esempio n. 11
0
 def testFristTone(self):
     self.assertEqual(1, colorize_pinyin.determine_tone('fāng'))
     self.assertEqual(1, colorize_pinyin.determine_tone('yī'))
Esempio n. 12
0
 def testSecondTone(self):
     self.assertEqual(2, colorize_pinyin.determine_tone('gán'))
     self.assertEqual(2, colorize_pinyin.determine_tone('xún'))
Esempio n. 13
0
 def testMixedPinyin(self):
     self.assertEqual(3, colorize_pinyin.determine_tone('bǎiwén'))
Esempio n. 14
0
 def testNonPinyin(self):
     self.assertEqual(0, colorize_pinyin.determine_tone('бурда'))
Esempio n. 15
0
 def testZeroTone(self):
     self.assertEqual(0, colorize_pinyin.determine_tone('de'))
     self.assertEqual(0, colorize_pinyin.determine_tone('ning'))
Esempio n. 16
0
 def testFourthTone(self, ):
     self.assertEqual(4, colorize_pinyin.determine_tone('àn'))
     self.assertEqual(4, colorize_pinyin.determine_tone('dìnggòu'))
Esempio n. 17
0
 def testThirdTone(self):
     self.assertEqual(3, colorize_pinyin.determine_tone('fǎn'))
     self.assertEqual(3, colorize_pinyin.determine_tone('lǚ'))