def getChars(freqFile, startNo, endNo):
    chars = []
    reader = unicode_csv_reader(codecs.open(freqFile, 'rb', "utf-8"),
                                dialect='excel-tab')

    frequencyList = [x for x in reader]  #read the whole list
    frequencyList = frequencyList[startNo:endNo]

    for row in frequencyList:
        templist = list(row[i] for i in [1, 4, 5])
        pinyin = ReadingFactory()
        readings = templist[1].split('/')
        # print readings
        readingString = ""
        for reading in readings:
            readingString += pinyin.convert(reading,
                                            'Pinyin',
                                            'Pinyin',
                                            sourceOptions={
                                                'toneMarkType': 'numbers',
                                                'missingToneMark': 'fifth'
                                            }) + " "
        templist[1] = readingString

        chars.append(templist)

    return chars
Ejemplo n.º 2
0
def getReadingOperator(readingName, readingOptions={}):
    global _readingOperator
    if not _readingOperator:
        readingFactory = ReadingFactory()
        _readingOperator = readingFactory.createReadingOperator(readingName,
            **readingOptions)
    return _readingOperator
Ejemplo n.º 3
0
    def __init__(self, fromReading, toReading, variant=None, **options):
        self.id = '%s-%s' % (fromReading, toReading)

        if variant: self.id += '/' + variant

        icu.Transliterator.__init__(self, self.id)

        self._conv = ReadingFactory().createReadingConverter(
            fromReading, toReading, **options)
Ejemplo n.º 4
0
    def setUp(self):
        NeedsDatabaseTest.setUp(self)
        self.f = ReadingFactory(dbConnectInst=self.db)

        try:
            import PyICU

            self.toNumeric = PyICU.Transliterator.createInstance(
                "Latin-NumericPinyin", PyICU.UTransDirection.UTRANS_FORWARD)
            self.fromNumeric = self.toNumeric.createInverse()
        except ImportError:
            pass
Ejemplo n.º 5
0
    def setUp(self):
        NeedsDatabaseTest.setUp(self)
        self.fromReading, self.toReading = self.CONVERSION_DIRECTION

        for clss in self.getReadingConverterClasses().values():
            if self.CONVERSION_DIRECTION in clss.CONVERSION_DIRECTIONS:
                self.readingConverterClass = clss
                break
        else:
            self.readingConverterClass = None

        self.f = ReadingFactory(dbConnectInst=self.db)
Ejemplo n.º 6
0
def _decomposeAndRemovePinyinTones(string, type='diacritics'):
	if string is None:
		return None
	if not isinstance(string, unicode):
		string = unicode(string, 'utf-8')
		# print "isinstance of unique: " + keyword
	from cjklib.reading import ReadingFactory
	rf = ReadingFactory()
	readings = rf.decompose(string, 'Pinyin')
	readings = [rf.convert(string, 'Pinyin', 'Pinyin', 
		sourceOptions={'toneMarkType': type},
		targetOptions={'toneMarkType': 'none'}).lower().replace(u'ü', u'v') for string in readings]
	readings = [r for r in readings if r != ' ' and r != "'"]
	return readings
Ejemplo n.º 7
0
    def testEveryConverterHasConsistencyTest(self):
        """
        Check if every reading has a test case.
        """
        testClasses = self.getReadingConverterConsistencyTestClasses()
        testClassReadingNames = [clss.CONVERSION_DIRECTION for clss \
            in testClasses]
        self.f = ReadingFactory(dbConnectInst=self.db)

        for clss in self.f.getReadingConverterClasses():
            for direction in clss.CONVERSION_DIRECTIONS:
                self.assert_(direction in testClassReadingNames,
                    "Conversion from %s to %s" % direction \
                    + "has no ReadingOperatorConsistencyTest")
Ejemplo n.º 8
0
    def setDictionaryInstance(self, dictInstance):
        super(ReadingConversion, self).setDictionaryInstance(dictInstance)

        if (not hasattr(self._dictInstance, 'READING')
                or not hasattr(self._dictInstance, 'READING_OPTIONS')):
            raise ValueError('Incompatible dictionary')

        self.fromReading = self._dictInstance.READING
        self.sourceOptions = self._dictInstance.READING_OPTIONS

        self._readingFactory = ReadingFactory(
            dbConnectInst=self._dictInstance.db)

        toReading = self.toReading or self.fromReading
        if not self._readingFactory.isReadingConversionSupported(
                self.fromReading, toReading):
            raise ValueError("Conversion from '%s' to '%s' not supported" %
                             (self.fromReading, toReading))
Ejemplo n.º 9
0
    def open (self, dbname):
        """Open the database."""
        self.dbname = dbname
        if not hasattr(self, '_dictionaryName'):
	    self._dictionaryName = dbname
        try:
            self._dictInst = getDictionary(self._dictionaryName,
		entryFactory=entry.UnifiedHeadword())
        except ValueError as e:
            if debug: print(e, file=sys.stderr)
            return False

	if self._dictInst.READING:
	    f = ReadingFactory()
	    opClass = f.getReadingOperatorClass(self._dictInst.READING)
	    if hasattr(opClass, 'guessReadingDialect'):
		self._opClass = opClass

        return True
Ejemplo n.º 10
0
 def fix_pinyin(self, pinyin):
     # Hacks. It is overkill to ship cjklib with this add-on. But
     # to get the tone numbers as numbers, we should use it. My
     # hope (guess) is that the typical user that will want Chinese
     # pronunciations will also have TTEMPÉ's (version of mine)
     # chinese-support-plugin installed. So try to use that and
     # don't complain if it doesn't work.
     if not self.have_tried_cjklib_hack:
         try:
             # If this works, the whole shebang is run as an Anki2
             # add-on. If not, we will still look for a system-wide
             # cjklib, but obviously not for anothre add-on.
             from aqt.utils import isWin
         except:
             pass
         else:
             from aqt import mw
             addon_dir = mw.pm.addonFolder()
             if isWin:
                 # The isWin bit is copied from TTEMPÉ's code.
                 addon_dir = addon_dir.encode(sys.getfilesystemencoding())
             sys.path.append(os.path.join(addon_dir, "chinese"))
         self.have_tried_cjk_hack = True
     if not self.reading_factory:
         try:
             from cjklib.reading import ReadingFactory
         except ImportError:
             return pinyin
         else:
             self.reading_factory = ReadingFactory()
     return self.reading_factory.convert(pinyin,
                                         'Pinyin',
                                         'Pinyin',
                                         targetOptions={
                                             'toneMarkType': 'numbers'
                                         }).replace('5', '0')
Ejemplo n.º 11
0
from cjklib.reading import ReadingFactory

f = ReadingFactory()


[
    'GR', 'Pinyin', 'WadeGiles', 'MandarinBraille', 'MandarinIPA',
    'ShanghaineseIPA',
    #'Hangul',
    #'Kana', 'Hiragana', 'Katakana',
    'CantoneseYale', 'CantoneseIPA', 'Jyutping'
]


DConv = {
    # Mandarin conversions
    ('cmn_Latn|Gwoyeu Romatzyh', 'cmn_Latn|x-Pinyin'): lambda s: f.convert(s, 'GR', 'Pinyin'),
    ('cmn_Latn|Gwoyeu Romatzyh', 'cmn_Latn|Wade-Giles'): lambda s: f.convert(s, 'GR', 'WadeGiles'),
    ('cmn_Latn|Gwoyeu Romatzyh', 'cmn_Latn|Braille'): lambda s: f.convert(s, 'GR', 'MandarinBraille'),
    ('cmn_Latn|Gwoyeu Romatzyh', 'cmn_Latn|Alternative IPA'): lambda s: f.convert(s, 'GR', 'MandarinIPA'),


    ('cmn_Latn|Numeric Pinyin', 'cmn_Latn|x-Pinyin'): lambda s: f.convert(s, 'Pinyin', 'Pinyin', sourceOptions={
        'toneMarkType': 'numbers'
    }),
    ('cmn_Latn|Numeric Pinyin', 'cmn_Latn|Gwoyeu Romatzyh'): lambda s: f.convert(s, 'Pinyin', 'GR', sourceOptions={
        'toneMarkType': 'numbers'
    }),
    ('cmn_Latn|Numeric Pinyin', 'cmn_Latn|Wade-Giles'): lambda s: f.convert(s, 'Pinyin', 'WadeGiles', sourceOptions={
        'toneMarkType': 'numbers'
    }),
Ejemplo n.º 12
0
def runTests(tests, databases, registerUnicode, iteration=10):
    f = ReadingFactory()

    timing = {}
    for no in tests:
        print("Running test %d (reading from %s)..." % (no, databases[no]))

        connection = {
            'sqlalchemy.url': 'sqlite:///%s' % databases[no],
            'attach': ['cjklib'],
            'registerUnicode': registerUnicode[no]
        }
        db = dbconnector.getDBConnector(connection)
        availableDicts = [dictClass.DICTIONARY_TABLE for dictClass
                          in dictionary.BaseDictionary\
                             .getAvailableDictionaries(db)]
        dictionaries = list(
            set(availableDicts)
            & set(db.engine.table_names(schema=db._mainSchema)))
        if not dictionaries:
            raise ValueError("No dictionaries found")

        print("Found dictionaries '%s'" % "', '".join(dictionaries))

        runTime = {}
        for dictName in dictionaries:
            dictClass = dictionary.BaseDictionary.getDictionaryClass(dictName)
            dictInstance = dictClass(dbConnectInst=db)

            opClass = (dictClass.READING
                       and f.getReadingOperatorClass(dictClass.READING))
            if hasattr(opClass, 'guessReadingDialect'):
                requestList = []
                for request in SEARCH_REQUESTS:
                    options = opClass.guessReadingDialect(request)
                    requestList.append((request, options))
            else:
                requestList = [(request, {}) for request in SEARCH_REQUESTS]

            mod = imp.new_module('timeit_runmod')
            mod.runRequest = runRequest
            mod.dictInstance = dictInstance
            mod.requestList = requestList

            sys.modules['timeit_runmod'] = mod

            methodTime = {}
            for method in ('getFor', 'getForHeadword', 'getForReading',
                           'getForTranslation'):
                t = Timer(
                    """timeit_runmod.runRequest(
                                timeit_runmod.dictInstance,
                                timeit_runmod.requestList,
                                method='%s')
                          """ % method, "import timeit_runmod")
                methodTime[method] = t.timeit(iteration)
            runTime[dictName] = methodTime

        timing[no] = runTime

    return timing
Ejemplo n.º 13
0
 def setUp(self):
     CharacterLookupTest.setUp(self)
     self.f = ReadingFactory(dbConnectInst=self.db)
Ejemplo n.º 14
0
def main():
    language, output_encoding = locale.getdefaultlocale()

    if len(sys.argv) == 2:
        modus = sys.argv[1]
        if modus not in modi:
            print "invalid modus, choose one out of: " + ", ".join(modi.keys())
            sys.exit(1)
    else:
        print "give a modus, choose one out of: " + ", ".join(modi.keys())
        sys.exit(1)

    fromReading, toReading, entryFunc, readingOpt = modi[modus]

    initialRules = INITIAL_RULES[(fromReading, toReading)]
    finialRules = FINAL_RULES[(fromReading, toReading)]
    extraSyllables = EXTRA_SYLLABLES[(fromReading, toReading)]

    # entry set
    global entrySet
    entrySet = set()
    # build table and use scheme with almost perfect grouping according to
    #   pronunciation, then use headers to get the initial's and final's
    #   pronunciation.
    op = ReadingFactory().createReadingOperator(fromReading, **readingOpt)

    # get splitted syllables, finals in first row, initials in first column
    for syllable in op.getReadingEntities():
        initial, final = op.getOnsetRhyme(syllable)
        # only apply rules if syllable isn't given an extra mapping in
        #   EXTRA_SYLLABLES
        if not syllable in extraSyllables:
            # check if we have rules
            if initialRules[initial] != None and finialRules[final] != None:
                # check for ambiguous mappings
                if type(initialRules[initial]) == type({}):
                    initialFeatures = initialRules[initial].keys()
                else:
                    initialFeatures = [None]
                if type(finialRules[final]) == type({}):
                    finalFeatures = finialRules[final].keys()
                else:
                    finalFeatures = [None]

                # go through all mappings
                for initialFeature in initialFeatures:
                    for finalFeature in finalFeatures:
                        if initialFeature:
                            targetInitial \
                                = initialRules[initial][initialFeature]
                        else:
                            targetInitial = initialRules[initial]

                        if finalFeature:
                            targetFinal = finialRules[final][finalFeature]
                        else:
                            targetFinal = finialRules[final]

                        entry = entryFunc(syllable, targetInitial, targetFinal,
                            initialFeature, finalFeature)
                        if entry != None:
                            entrySet.add(entry)
            else:
                print >> sys.stderr, ("missing rule(s) for syllable '" \
                    + syllable + "' with initial/final '" + initial + "'/'" \
                    + final + "'").encode(output_encoding)

    # print extra syllables
    for syllable in extraSyllables:
        if extraSyllables[syllable]:
            initialRule, finalRule = extraSyllables[syllable]
            # check for ambiguous mappings
            if type(initialRule) == type({}):
                initialFeatures = initialRule.keys()
            else:
                initialFeatures = [None]
            if type(finalRule) == type({}):
                finalFeatures = finalRule.keys()
            else:
                finalFeatures = [None]

            # go through all mappings
            for initialFeature in initialFeatures:
                for finalFeature in finalFeatures:
                    if initialFeature:
                        targetInitial = initialRule[initialFeature]
                    else:
                        targetInitial = initialRule

                    if finalFeature:
                        targetFinal = finalRule[finalFeature]
                    else:
                        targetFinal = finalRule

                    entry = entryFunc(syllable, targetInitial, targetFinal,
                        initialFeature, finalFeature)
                    if entry != None:
                        entrySet.add(entry)

    notIncludedSyllables = [syllable for syllable in extraSyllables \
        if not extraSyllables[syllable]]
    if notIncludedSyllables:
        print >> sys.stderr, ("Syllables not included in table: '" \
            + "', '".join(sorted(notIncludedSyllables)) + "'")\
            .encode(output_encoding)

    entryList = list(entrySet)
    entryList.sort()
    print "\n".join(entryList).encode(output_encoding)
Ejemplo n.º 15
0
    def __init__(self, *args, **kwargs):
        Tk.Tk.__init__(self, *args, **kwargs)
        self.title("Ntrain")
        # place window in the center
        self.eval('tk::PlaceWindow %s center' %
                  self.winfo_pathname(self.winfo_id()))
        self._default_font = tkFont.nametofont("TkDefaultFont")
        self._default_font.configure(size=30)
        # define default dataset
        self._defaultfile = os.path.join(
            os.path.dirname(os.path.abspath(__file__)), 'chinese100.xlsx')

        # load default filename into label
        basename = os.path.basename(self._defaultfile)
        self._filename_value = Tk.StringVar()
        self._sett_fn_label = Tk.Entry(textvariable=self._filename_value,
                                       font=self._default_font,
                                       width=12)
        self._filename_value.set(basename)
        self._sett_fn_label.grid(row=1, column=0, sticky=Tk.W)

        # button to browse for datafile
        self.browse = Tk.Button(self, text="Browse", command=self._get_file)
        self.browse.grid(row=1, column=1, sticky=Tk.W)

        # OK button to start game
        self._reset_button = Tk.Button(text="Reset", command=self._reset_list)
        self._reset_button.grid(row=1, column=2)

        # label
        self._sett_label = Tk.Label(text="Number of Cards:")
        self._sett_label.grid(row=2, column=0, sticky=Tk.E)

        # entry field for number of cards
        entryText = Tk.StringVar()
        self._sett_entry = Tk.Entry(textvariable=entryText,
                                    font=self._default_font,
                                    width=3)
        entryText.set("30")
        self._sett_entry.grid(row=2, column=1, sticky=Tk.W)
        self._sett_entry.focus_set()

        # reverse option
        self._radio_val = Tk.IntVar()
        self._radio1 = Tk.Radiobutton(text="Ch to E",
                                      variable=self._radio_val,
                                      value=1)
        self._radio1.grid(row=4, column=0)
        self._radio2 = Tk.Radiobutton(text="E to Ch",
                                      variable=self._radio_val,
                                      value=2)
        self._radio2.grid(row=4, column=1)
        self._radio_val.set(1)

        # OK button to start game
        self._sett_button = Tk.Button(text="OK", command=self._start_game)
        self._sett_button.grid(columnspan=3)

        # Bind return key to start game
        self.bind('<Return>', self._start_game)

        self._p = Pinyin()
        self._f = ReadingFactory()