コード例 #1
0
ファイル: kanRen.py プロジェクト: fenildf/anki_addons-1
def getStrokeOrd(fin, kl):
    """
        Trying for awareness of glyph locale
        in lookup.
        """
    from cjklib.characterlookup import CharacterLookup
    for i in kl:
        if i in cedict.simplified: cjk = CharacterLookup('C')
        elif i in cedict.traditional: cjk = CharacterLookup('T')
        else: cjk = CharacterLookup('J')
        j = cjk.getStrokeOrder(i)
        fin.append(u'• ' + u' '.join(j))
    return fin
コード例 #2
0
ファイル: kanRen.py プロジェクト: fenildf/anki_addons-1
def auxSOrd(i):
    """
        Try to get stroke decomposition
        if subcomponent decomposition fails.
        """
    from cjklib.characterlookup import CharacterLookup
    if i in cedict.simplified: cjk = CharacterLookup('C')
    elif i in cedict.traditional: cjk = CharacterLookup('T')
    else: cjk = CharacterLookup('J')
    try:
        j = cjk.getStrokeOrder(i)
    except:
        return u'[x]'
    return u' '.join(j)
コード例 #3
0
def mandarinToPinyin(mandarinChar):
    cjk = CharacterLookup('C')
    textPinYinList = cjk.getReadingForCharacter(mandarinChar,
                                                'Pinyin',
                                                toneMarkType='none')
    if len(textPinYinList) > 1:
        print "converted syllable {} has {} parts".format(
            textPinYinList, len(textPinYinList))
    pinyin = textPinYinList[
        0]  # take only first variant of pinyin interpretations
    return pinyin
コード例 #4
0
def tokenize(input, output):
    try:
        text = open(input, 'r').readlines()
    except IOError:
        print "IOError: could not open", input
        sys.exit()

    cjk = CharacterLookup('T')
    out = open(output, 'w')

    for line in text:
        line = line.decode('utf-8')
        new_line = ""
        for char in line:
            pinyin = cjk.getReadingForCharacter(char, 'Pinyin')
            if pinyin:
                new_line += char
        new_line += '\n'
        out.write(new_line.encode('utf-8'))
    out.close()
コード例 #5
0
def to_pinyin(filename):
        try:
                input = open(filename, 'r').readlines()
        except IOError:
                print "IOError: could not open", filename
                sys.exit()
 
        cjk = CharacterLookup('T')
 
        input = [u'我喜歡他']
 
        for line in input:
                #line = line.decode('utf-8')
                new_line = ""
                for char in line:
                        pinyin = cjk.getReadingForCharacter(char, 'Pinyin')
                        if pinyin:
                                print [unidecode(x) for x in pinyin]
                                simplified = unidecode(pinyin[0])
                                new_line += simplified + char + " "
                line = new_line
                print line
コード例 #6
0
import requests
import codecs

import gevent
from gevent import monkey

monkey.patch_all()
from bs4 import BeautifulSoup

# 汉字拼音识别
from pypinyin import pinyin, lazy_pinyin, Style

# 笔划数识别
from cjklib.characterlookup import CharacterLookup

cjk = CharacterLookup('C')

# 汉字偏旁识别
from lib.component import *

reload(sys)
sys.setdefaultencoding("utf-8")

# 代理配置
proxies = {}


class BabyName():
    def __init__(self,
                 config={},
                 name_dict={},
コード例 #7
0
 def _characterLookup(cls):
     if not hasattr(cls, '_cjk'):
         cls._cjk = CharacterLookup('T', 'Unicode')
     return cls._cjk
コード例 #8
0
 def getCharacters(self):
     cjk = CharacterLookup('T', self.title)
     return ' '.join(cjk.getDomainCharacterIterator())
コード例 #9
0
 def __init__(self):
     self._cjk = CharacterLookup('T', 'Unicode')
     self.characterIterator = self._cjk.getDomainCharacterIterator()
     self.curChar = None
     self.glyphQueue = []
コード例 #10
0
ファイル: check-strokes.py プロジェクト: yueqianzhang/cjklib
    def __init__(self, options, args):
        self._locale = options.locale
        self._characterDomain = options.characterDomain

        self._cjk = CharacterLookup(self._locale, self._characterDomain)
コード例 #11
0
 def __init__(self, variant='T'):
     self.characterLookup = CharacterLookup('T')
     self.variant = variant
コード例 #12
0
ファイル: checkcjkradicals.py プロジェクト: ninchanese/cjklib
def main():
    cjk = CharacterLookup('T')
    cjkSimplified = CharacterLookup('C')

    fileEntryCount = 0
    databaseMissingEntryCount = 0
    noEntryCount = 0
    wrongEquivalentCount = 0
    seenRadicalFormIndices = set()
    seenRadicalVariantIndices = set()
    for line in sys.stdin:
        line = line.decode(default_encoding)

        if re.match(r'\s*#', line) or re.match(r'\s+$', line):
            continue
        else:
            fileEntryCount = fileEntryCount + 1

            matchObj = re.match(r"(\d{1,3})('?);\s+([1234567890ABCDEF]{4,5});" \
                + r"\s+([1234567890ABCDEF]{4,5})\s*$", line)
            if matchObj:
                index, variant, radicalCP, equivalentCP = matchObj.groups()
                radicalIdx = int(index)
                radicalForm = chr(int(radicalCP, 16))
                equivalentForm = chr(int(equivalentCP, 16))

                if variant:
                    seenRadicalVariantIndices.add(radicalIdx)
                else:
                    seenRadicalFormIndices.add(radicalIdx)
                # check radicalForm
                if not variant:
                    targetForms = set([cjk.getKangxiRadicalForm(radicalIdx)])
                else:
                    targetForms = set()
                    # add simplified form, if different
                    simplifiedForm = cjkSimplified.getKangxiRadicalForm(
                        radicalIdx)
                    if simplifiedForm != cjk.getKangxiRadicalForm(radicalIdx):
                        targetForms.add(simplifiedForm)
                    # add simplified variant
                    targetForms.update(
                        set(cjkSimplified.getKangxiRadicalVariantForms(
                            radicalIdx)) \
                        - set(cjk.getKangxiRadicalVariantForms(radicalIdx)))

                if radicalForm not in targetForms:
                    # cjklib is missing something
                    print(("No entry for radical form '%s' with index %d%s"
                        % (radicalForm, radicalIdx, variant))\
                        .encode(default_encoding))
                    databaseMissingEntryCount += 1
                if targetForms - set([radicalForm]):
                    # CJKRadicals.txt is missing something
                    for form in targetForms - set([radicalForm]):
                        print(("Database entry '%s' with radical index %d%s" \
                            % (form, radicalIdx, variant) \
                            + " not included in table")\
                            .encode(default_encoding))
                    noEntryCount += 1

                # check equivalentForm
                libraryEquivalentForm \
                    = cjk.getRadicalFormEquivalentCharacter(radicalForm)
                if libraryEquivalentForm != equivalentForm:
                    print(("Equivalent radical form '%s' with index %d%s"
                        % (libraryEquivalentForm, radicalIdx, variant) \
                        + " not backed by table: '%s'" % equivalentForm)\
                        .encode(default_encoding))
                    wrongEquivalentCount += 1

            else:
                print(("error reading line: '" + line + "'")\
                    .encode(default_encoding))


    for radicalIdx in set(range(1, 215)) - seenRadicalFormIndices:
        print(("No table entry for radical index %d" % radicalIdx)\
            .encode(default_encoding))
        noEntryCount += 1

    for radicalIdx in set(range(1, 215)) - seenRadicalVariantIndices:
        simplifiedForms = set()
        # add simplified form, if different
        simplifiedForm = cjkSimplified.getKangxiRadicalForm(
            radicalIdx)
        if simplifiedForm != cjk.getKangxiRadicalForm(radicalIdx):
            simplifiedForms.add(simplifiedForm)
        # add simplified variant
        simplifiedForms.update(
            set(cjkSimplified.getKangxiRadicalVariantForms(
                radicalIdx)) \
            - set(cjk.getKangxiRadicalVariantForms(radicalIdx)))
        for form in simplifiedForms:
            print(("No table entry for simplified radical %s with index %d'"
                % (form, radicalIdx)).encode(default_encoding))
            noEntryCount += 1

    for radicalIdx in range(1, 215):
        otherVariants = set(cjk.getKangxiRadicalVariantForms(radicalIdx)) \
            - set(cjkSimplified.getKangxiRadicalVariantForms(radicalIdx))
        for form in otherVariants:
            print(("No table entry for variant %s with index %d'"
                % (form, radicalIdx)).encode(default_encoding))
            noEntryCount += 1

    print("Total %d entries" % fileEntryCount \
        + ", %d missing from cjklib" % databaseMissingEntryCount \
        + ", %d mismatches in equivalent forms" % wrongEquivalentCount \
        + ", not found in source list: %d" % noEntryCount)