Exemple #1
0
def pinyin_indexes(content):
	pinyin = find_pinyin(content)
	# assert type(pinyin) == unicode

	if not pinyin or pinyin == "_":
		return ()

	indexes = set()

	# multiple pronunciations
	for pinyin in re.split(r",|;", pinyin):

		# find all pinyin ranges, use them to rip pinyin out
		py = [
			r._slice(pinyin)
			for r in color.ranges_of_pinyin_in_string(pinyin)
		]

		# maybe no pinyin here
		if not py:
			return ()

		# just pinyin, with diacritics, separated by whitespace
		indexes.add("%s." % color.utf(" ".join(py)))

		# pinyin with diacritics replaced by tone numbers
		indexes.add("%s." % color.utf(" ".join(
			["%s%d" % (
				color.lowercase_string_by_removing_pinyin_tones(p),
				color.determine_tone(p)) for p in py])))
	return indexes
Exemple #2
0
def pinyin_indexes(content):
    pinyin = find_pinyin(content)
    # assert type(pinyin) == unicode

    if not pinyin or pinyin == '_':
        return ()

    indexes = set()

    # multiple pronunciations
    for pinyin in re.split(r',|;', pinyin):

        # find all pinyin ranges, use them to rip pinyin out
        py = [r._slice(pinyin) for r in color.ranges_of_pinyin_in_string(pinyin)]

        # maybe no pinyin here
        if not py:
            return ()

        # just pinyin, with diacritics, separated by whitespace
        indexes.add('%s.' % color.utf(' '.join(py)))

        # pinyin with diacritics replaced by tone numbers
        indexes.add('%s.' % color.utf(' '.join(
            ['%s%d' % (
                color.lowercase_string_by_removing_pinyin_tones(p),
                color.determine_tone(p)) for p in py])))
    return indexes
Exemple #3
0
def pinyin_indexes(content):
    pinyin = find_pinyin(content)
    # assert type(pinyin) == unicode

    if not pinyin or pinyin == '_':
        return ()

    indexes = set()

    # multiple pronunciations
    for pinyin in re.split(r',|;', pinyin):

        # find all pinyin ranges, use them to rip pinyin out
        py = [
            r._slice(pinyin) for r in color.ranges_of_pinyin_in_string(pinyin)
        ]

        # maybe no pinyin here
        if not py:
            return ()

        # just pinyin, with diacritics, separated by whitespace
        indexes.add('%s.' % color.utf(u' '.join(py)))

        # pinyin with diacritics replaced by tone numbers
        indexes.add('%s.' % color.utf(u' '.join(
            map(
                lambda p: '%s%d' %
                (color.lowercase_string_by_removing_pinyin_tones(p),
                 color.determine_tone(p)),
                # for each sub word
                py))))
    return indexes
Exemple #4
0
 def testFristTone(self):
     self.assertEqual(1, colorize_pinyin.determine_tone('fāng'))
     self.assertEqual(1, colorize_pinyin.determine_tone('yī'))
Exemple #5
0
 def testMixedPinyin(self):
     self.assertEqual(3, colorize_pinyin.determine_tone('bǎiwén'))
Exemple #6
0
 def testNonPinyin(self):
     self.assertEqual(0, colorize_pinyin.determine_tone('бурда'))
Exemple #7
0
 def testZeroTone(self):
     self.assertEqual(0, colorize_pinyin.determine_tone('de'))
     self.assertEqual(0, colorize_pinyin.determine_tone('ning'))
Exemple #8
0
 def testFourthTone(self, ):
     self.assertEqual(4, colorize_pinyin.determine_tone('àn'))
     self.assertEqual(4, colorize_pinyin.determine_tone('dìnggòu'))
Exemple #9
0
 def testThirdTone(self):
     self.assertEqual(3, colorize_pinyin.determine_tone('fǎn'))
     self.assertEqual(3, colorize_pinyin.determine_tone('lǚ'))
Exemple #10
0
 def testSecondTone(self):
     self.assertEqual(2, colorize_pinyin.determine_tone('gán'))
     self.assertEqual(2, colorize_pinyin.determine_tone('xún'))
Exemple #11
0
 def testFristTone(self):
     self.assertEqual(1, colorize_pinyin.determine_tone('fāng'))
     self.assertEqual(1, colorize_pinyin.determine_tone('yī'))
Exemple #12
0
 def testSecondTone(self):
     self.assertEqual(2, colorize_pinyin.determine_tone('gán'))
     self.assertEqual(2, colorize_pinyin.determine_tone('xún'))
Exemple #13
0
 def testMixedPinyin(self):
     self.assertEqual(3, colorize_pinyin.determine_tone('bǎiwén'))
Exemple #14
0
 def testNonPinyin(self):
     self.assertEqual(0, colorize_pinyin.determine_tone('бурда'))
Exemple #15
0
 def testZeroTone(self):
     self.assertEqual(0, colorize_pinyin.determine_tone('de'))
     self.assertEqual(0, colorize_pinyin.determine_tone('ning'))
Exemple #16
0
 def testFourthTone(self, ):
     self.assertEqual(4, colorize_pinyin.determine_tone('àn'))
     self.assertEqual(4, colorize_pinyin.determine_tone('dìnggòu'))
Exemple #17
0
 def testThirdTone(self):
     self.assertEqual(3, colorize_pinyin.determine_tone('fǎn'))
     self.assertEqual(3, colorize_pinyin.determine_tone('lǚ'))