def getChars(freqFile, startNo, endNo):
    chars = []
    reader = unicode_csv_reader(codecs.open(freqFile, 'rb', "utf-8"),
                                dialect='excel-tab')

    frequencyList = [x for x in reader]  #read the whole list
    frequencyList = frequencyList[startNo:endNo]

    for row in frequencyList:
        templist = list(row[i] for i in [1, 4, 5])
        pinyin = ReadingFactory()
        readings = templist[1].split('/')
        # print readings
        readingString = ""
        for reading in readings:
            readingString += pinyin.convert(reading,
                                            'Pinyin',
                                            'Pinyin',
                                            sourceOptions={
                                                'toneMarkType': 'numbers',
                                                'missingToneMark': 'fifth'
                                            }) + " "
        templist[1] = readingString

        chars.append(templist)

    return chars
Ejemplo n.º 2
0
class PinyinICUTest(NeedsDatabaseTest, unittest.TestCase):
    """Test Pinyin tonemark conversion on ICU transformation rule."""
    CONVERSION_DIRECTION = ('Pinyin', 'Pinyin')

    def setUp(self):
        NeedsDatabaseTest.setUp(self)
        self.f = ReadingFactory(dbConnectInst=self.db)

        try:
            import PyICU

            self.toNumeric = PyICU.Transliterator.createInstance(
                "Latin-NumericPinyin", PyICU.UTransDirection.UTRANS_FORWARD)
            self.fromNumeric = self.toNumeric.createInverse()
        except ImportError:
            pass

    def testToneMarkPlacement(self):
        """Test Pinyin tonemark conversion on ICU transformation rule."""
        if not hasattr(self, 'toNumeric'):
            return

        for readingEntity in self.f.getReadingEntities('Pinyin'):
            if readingEntity in (u'hn\u0304g', u'h\u0144g', u'h\u0148g',
                u'h\u01f9g', u'n\u0304g', u'\u0144g', u'\u0148g',
                u'\u01f9g'):
                continue
            targetEntity = self.f.convert(readingEntity, 'Pinyin', 'Pinyin',
                targetOptions={'toneMarkType': 'numbers',
                    'missingToneMark': 'fifth'})
            self.assertEquals(targetEntity,
                self.toNumeric.transliterate(readingEntity))

        for readingEntity in self.f.getReadingEntities('Pinyin',
            toneMarkType='numbers', missingToneMark='fifth'):
            if readingEntity in ('hng1', 'hng2', 'hng3', 'hng4', 'ng1', 'ng2',
                'ng3', 'ng4', u'ê1', u'ê2', u'ê3', u'ê4'):
                continue
            targetEntity = self.f.convert(readingEntity, 'Pinyin', 'Pinyin',
                sourceOptions={'toneMarkType': 'numbers',
                    'missingToneMark': 'fifth'})
            self.assertEquals(targetEntity,
                self.fromNumeric.transliterate(readingEntity))
Ejemplo n.º 3
0
class ReadingConversion(Base):
    """Converts the entries' reading string to the given target reading."""
    def __init__(self, toReading=None, targetOptions=None):
        """
        Constructs the conversion strategy.

        :type toReading: str
        :param toReading: target reading, if omitted, the dictionary's reading
            is assumed.
        :type targetOptions: dict
        :param targetOptions: target reading conversion options
        """
        Base.__init__(self)
        self.toReading = toReading
        if targetOptions:
            self.targetOptions = targetOptions
        else:
            self.targetOptions = {}

    def setDictionaryInstance(self, dictInstance):
        super(ReadingConversion, self).setDictionaryInstance(dictInstance)

        if (not hasattr(self._dictInstance, 'READING')
                or not hasattr(self._dictInstance, 'READING_OPTIONS')):
            raise ValueError('Incompatible dictionary')

        self.fromReading = self._dictInstance.READING
        self.sourceOptions = self._dictInstance.READING_OPTIONS

        self._readingFactory = ReadingFactory(
            dbConnectInst=self._dictInstance.db)

        toReading = self.toReading or self.fromReading
        if not self._readingFactory.isReadingConversionSupported(
                self.fromReading, toReading):
            raise ValueError("Conversion from '%s' to '%s' not supported" %
                             (self.fromReading, toReading))

    def format(self, string):
        toReading = self.toReading or self.fromReading
        try:
            return self._readingFactory.convert(
                string,
                self.fromReading,
                toReading,
                sourceOptions=self.sourceOptions,
                targetOptions=self.targetOptions)
        except (exception.DecompositionError, exception.CompositionError,
                exception.ConversionError):
            return None
Ejemplo n.º 4
0
def _decomposeAndRemovePinyinTones(string, type='diacritics'):
	if string is None:
		return None
	if not isinstance(string, unicode):
		string = unicode(string, 'utf-8')
		# print "isinstance of unique: " + keyword
	from cjklib.reading import ReadingFactory
	rf = ReadingFactory()
	readings = rf.decompose(string, 'Pinyin')
	readings = [rf.convert(string, 'Pinyin', 'Pinyin', 
		sourceOptions={'toneMarkType': type},
		targetOptions={'toneMarkType': 'none'}).lower().replace(u'ü', u'v') for string in readings]
	readings = [r for r in readings if r != ' ' and r != "'"]
	return readings
Ejemplo n.º 5
0
class ReadingConversion(Base):
    """Converts the entries' reading string to the given target reading."""
    def __init__(self, toReading=None, targetOptions=None):
        """
        Constructs the conversion strategy.

        :type toReading: str
        :param toReading: target reading, if omitted, the dictionary's reading
            is assumed.
        :type targetOptions: dict
        :param targetOptions: target reading conversion options
        """
        Base.__init__(self)
        self.toReading = toReading
        if targetOptions:
            self.targetOptions = targetOptions
        else:
            self.targetOptions = {}

    def setDictionaryInstance(self, dictInstance):
        super(ReadingConversion, self).setDictionaryInstance(
            dictInstance)

        if (not hasattr(self._dictInstance, 'READING')
            or not hasattr(self._dictInstance, 'READING_OPTIONS')):
            raise ValueError('Incompatible dictionary')

        self.fromReading = self._dictInstance.READING
        self.sourceOptions = self._dictInstance.READING_OPTIONS

        self._readingFactory = ReadingFactory(
            dbConnectInst=self._dictInstance.db)

        toReading = self.toReading or self.fromReading
        if not self._readingFactory.isReadingConversionSupported(
            self.fromReading, toReading):
            raise ValueError("Conversion from '%s' to '%s' not supported"
                % (self.fromReading, toReading))

    def format(self, string):
        toReading = self.toReading or self.fromReading
        try:
            return self._readingFactory.convert(string, self.fromReading,
                toReading, sourceOptions=self.sourceOptions,
                targetOptions=self.targetOptions)
        except (exception.DecompositionError, exception.CompositionError,
            exception.ConversionError):
            # wighack
            return string 
Ejemplo n.º 6
0
class ReadingTransliterator(icu.Transliterator):
    def __init__(self, fromReading, toReading, variant=None, **options):
        self.id = '%s-%s' % (fromReading, toReading)

        if variant: self.id += '/' + variant

        icu.Transliterator.__init__(self, self.id)

        self._conv = ReadingFactory().createReadingConverter(
            fromReading, toReading, **options)

    def handleTransliterate(self, text, position, complete):
        substring = str(text[position.start:position.limit])

        converted = self._conv.convert(substring)
        text[position.start:position.limit] = converted

        lenDiff = len(substring) - len(converted)
        position.limit -= lenDiff
        position.contextLimit -= lenDiff

        position.start = position.limit

    @staticmethod
    def register(fromReading,
                 toReading,
                 variant=None,
                 registerInverse=False,
                 **options):
        trans = ReadingTransliterator(fromReading,
                                      toReading,
                                      variant=variant,
                                      **options)
        icu.Transliterator.registerInstance(trans)

        if registerInverse:
            inverseOptions = options.copy()
            inverseOptions['targetOptions'] = options.get('sourceOptions', {})
            inverseOptions['sourceOptions'] = options.get('targetOptions', {})

            invTrans = ReadingTransliterator(toReading,
                                             fromReading,
                                             variant=variant,
                                             **inverseOptions)
            icu.Transliterator.registerInstance(invTrans)

        return trans.id
def getChars(freqFile,startNo,endNo):
	chars = []
	reader=unicode_csv_reader(codecs.open(freqFile, 'rb',"utf-8"), dialect='excel-tab')
	
	frequencyList = [x for x in reader] #read the whole list
	frequencyList = frequencyList[startNo:endNo]
	
	for row in frequencyList:
		templist = list(row[i] for i in [1,4,5])
		pinyin = ReadingFactory()
		readings = templist[1].split('/')
		# print readings
		readingString = ""
		for reading in readings:
			readingString += pinyin.convert(reading, 'Pinyin', 'Pinyin', sourceOptions={'toneMarkType': 'numbers','missingToneMark': 'fifth'}) +" "
		templist[1] = readingString
		
		chars.append(templist)
	
	return chars
Ejemplo n.º 8
0
class ReadingTransliterator(icu.Transliterator):
    def __init__(self, fromReading, toReading, variant=None, **options):
        self.id = '%s-%s' % (fromReading, toReading)

        if variant: self.id += '/' + variant

        icu.Transliterator.__init__(self, self.id)

        self._conv = ReadingFactory().createReadingConverter(fromReading,
            toReading, **options)

    def handleTransliterate(self, text, position, complete):
        substring = unicode(text[position.start:position.limit])

        converted = self._conv.convert(substring)
        text[position.start:position.limit] = converted

        lenDiff = len(substring) - len(converted)
        position.limit -= lenDiff
        position.contextLimit -= lenDiff

        position.start = position.limit

    @staticmethod
    def register(fromReading, toReading, variant=None, registerInverse=False,
        **options):
        trans = ReadingTransliterator(fromReading, toReading, variant=variant,
            **options)
        icu.Transliterator.registerInstance(trans)

        if registerInverse:
            inverseOptions = options.copy()
            inverseOptions['targetOptions'] = options.get('sourceOptions', {})
            inverseOptions['sourceOptions'] = options.get('targetOptions', {})

            invTrans = ReadingTransliterator(toReading, fromReading,
                variant=variant, **inverseOptions)
            icu.Transliterator.registerInstance(invTrans)

        return trans.id
Ejemplo n.º 9
0
class ChineseLessonsComMandarinPronunciation(GlobbingPronunciationBuilder):
    """
    Builds an index on pronunciation files for Mandarin provided by
    chinese-lessions.com.
    """
    PROVIDES = "Pronunciation_Pinyin"
    DEPENDS = ['PinyinSyllables']

    BASE_DIRECTORY_NAME = "chineselessionscom_cmn"

    def __init__(self, **options):
        super(ChineseLessonsComMandarinPronunciation, self).__init__(**options)

        self.readingFactory = ReadingFactory()

    def getReadingFromFileName(self, fileName):
        fileRoot, _ = os.path.splitext(fileName)
        try:
            return self.readingFactory.convert(fileRoot, 'Pinyin', 'Pinyin',
                sourceOptions={'toneMarkType': 'numbers'})
        except exception.UnsupportedError:
            pass
        except exception.ConversionError:
            pass
Ejemplo n.º 10
0
    def handle_noargs(self, **options):
        # 一事無成 一事无成 [yi1 shi4 wu2 cheng2] /to have achieved nothing/to be a total failure/to get nowhere/

        # EMPTY ALL ZH + PY KEYS
        self._del_keys('ZH:*')
        self._del_keys('PY:*')
        
        # NOW LETS START
        file = open(settings.DICT_FILE_LOCATION)
        item_count = 0
        for line in file:
            if line.startswith("#"):
                pass
            else:
                
                # OPEN REDIS CONNECTION NOW
                r_server = _get_redis()
                
                # GATHER ALL THE MAIN VARIABLES
                new = line.split()
                numbered_pinyin = line[(line.index('[')+1):(line.index(']'))]
                f = ReadingFactory()
                tonal_pinyin =  f.convert(numbered_pinyin, 'Pinyin', 'Pinyin',
                    sourceOptions={'toneMarkType': 'numbers', 'yVowel': 'v',
                    'missingToneMark': 'fifth'})
                meanings = line[(line.index('/')+1):(line.rindex('/'))]               
                characters = new[1]
                
                # REMOVE ALL THE UGLY CHARACTERS
                if ',' in characters:
                    characters = characters.replace(',', '')
                
                
                # GET AND CLEAN THE MEASURE WORD
                mws = None
                if "CL:" in meanings:
                    new_meanings = meanings.split('/')
                    for idx, val in enumerate(new_meanings):
                        if "CL:" in val:
                            mws = []
                            for x in val.replace('CL:', '').split(','):
                                
                                x = x[:(x.index('['))]
                                if '|' in x:
                                    x = x[(x.index('|')+1):]
                                    
                                    
                                # ADD THE MEAASURE WORDS ENTRY
                                # ----------------------------
                                mws_key = settings.MEASURE_WORD_KEY % x   
                                if r_server.exists(mws_key):
                                    values = json.loads(_search_redis(mws_key))
                                    values['chars'].append(characters)
                                else:
                                    values = {'chars': [characters,]}
                                r_server.set(mws_key, json.dumps(values))                                
                                    
                                mws.append(x)
                            
                            
                            
                            new_meanings.pop(idx)
                    meanings = "/".join(new_meanings)
                

                    
                    
                    
                
                
                
                char_key = settings.CHINESE_WORD_KEY % ((len((characters))/3), characters)                 
                
                # CREATE THE PRONUNCIATION/MEANING PAIR
                pair = {}
                pair['pinyin'] = tonal_pinyin
                pair['pinyin_numbered'] = _normalize_pinyin(numbered_pinyin)
                pair['meaning'] = meanings
                pair['measure_words'] = mws
                
                
                
                # ADD THE PINYIN ENTRY
                # --------------------
                
                py_key = settings.PINYIN_WORD_KEY % _pinyin_to_ascii(numbered_pinyin)
                if r_server.exists(py_key):
                    values = json.loads(_search_redis(py_key))
                    if smart_unicode(characters) not in values:
                        values.append(characters)
                else:
                    values = [characters,]
                
                r_server.set(py_key, json.dumps(values))                    
    
    
    
    
                # ADD THE CHINESE CHARACTER ENTRY
                # -------------------------------
                if r_server.exists(char_key):
                    values = json.loads(_search_redis(char_key))
                    values['meanings'].append(pair)
                else:
                    values = {
                        'chars': characters,
                        'meanings': [pair,],
                    }
                    
                r_server.set(char_key, json.dumps(values))
                
                item_count += 1
                print item_count

                
                               
        
        print "%s Chinese items added" % item_count          
        file.close()        
Ejemplo n.º 11
0
class LeoDownloader(AudioDownloader):
    """Download audio from LEO"""
    def __init__(self):
        AudioDownloader.__init__(self)
        self.file_extension = u'.mp3'
        self.url = 'http://www.leo.org/dict/audio_{language}/{word}.mp3'
        # And, yes, they use ch for Chinese.
        # (I'm not sure if they really have anything for ru or it.)
        self.language_dict = {
            'de': 'de',
            'en': 'en',
            'es': 'es',
            'fr': 'fr',
            'it': 'it',
            'ru': 'ru',
            'zh': 'ch'
        }
        # It kind of looks like they have Swiss pronunciations, but hey don't.
        self.chinese_code = 'ch'
        # We should keep a number of site icons handy, with the right
        # flag for the request.
        self.site_icon_dict = {}
        self.site_file_name_encoding = 'ISO-8859-1'
        self.icon_url_dict = {
            'de': 'http://dict.leo.org/favicon.ico',
            'en': 'http://dict.leo.org/favicon.ico',
            'es': 'http://dict.leo.org/favicon_es.ico',
            'fr': 'http://dict.leo.org/favicon_fr.ico',
            'it': 'http://dict.leo.org/favicon_it.ico',
            'ru': 'http://dict.leo.org/favicon_ru.ico',
            # When we use this dict, we have already munged the 'zh' to 'ch'
            'ch': 'http://dict.leo.org/favicon_ch.ico'
        }
        # As the name implies, a hack. Try to use the cjklib TTEMPÉ
        # brings along. A syntem-wide installed one should work as
        # well.
        self.have_tried_cjklib_hack = False
        self.reading_factory = None

    def download_files(self, word, base, ruby, split):
        """
        Download a word from LEO

        We try to get pronunciations for the text for German, English,
        Spanish, French, Italian and Russian, and from the ruby for
        Chinese. There may not be any pronunciations available for
        Italian or Russian.
        """
        self.downloads_list = []
        # Fix the language. EAFP.
        self.language = self.language_dict[self.language[:2].lower()]
        # set_names also checks the language.
        self.set_names(word, base, ruby)
        if self.chinese_code == self.language and not split:
            return
        # Only get the icon when we have a word
        # self.maybe_get_icon()
        self.get_flag_icon()
        # EAFP. self.query_url may return None...
        word_url = self.query_url(word, ruby)
        # ... then the get_data will blow up
        word_data = self.get_data_from_url(word_url)
        word_file_path, word_file_name = self.get_file_name()
        with open(word_file_path, 'wb') as word_file:
            word_file.write(word_data)
        # We have a file, but not much to say about it.
        self.downloads_list.append(
            (word_file_path, word_file_name, dict(Source='Leo')))

    def query_url(self, word, ruby):
        """Build query URL"""
        if self.chinese_code == self.language:
            word = self.fix_pinyin(ruby)
        return self.url.format(language=self.language,
                               word=urllib.quote(
                                   word.encode(self.site_file_name_encoding)))

    def fix_pinyin(self, pinyin):
        # Hacks. It is overkill to ship cjklib with this add-on. But
        # to get the tone numbers as numbers, we should use it. My
        # hope (guess) is that the typical user that will want Chinese
        # pronunciations will also have TTEMPÉ's (version of mine)
        # chinese-support-plugin installed. So try to use that and
        # don't complain if it doesn't work.
        if not self.have_tried_cjklib_hack:
            try:
                # If this works, the whole shebang is run as an Anki2
                # add-on. If not, we will still look for a system-wide
                # cjklib, but obviously not for anothre add-on.
                from aqt.utils import isWin
            except:
                pass
            else:
                from aqt import mw
                addon_dir = mw.pm.addonFolder()
                if isWin:
                    # The isWin bit is copied from TTEMPÉ's code.
                    addon_dir = addon_dir.encode(sys.getfilesystemencoding())
                sys.path.append(os.path.join(addon_dir, "chinese"))
            self.have_tried_cjk_hack = True
        if not self.reading_factory:
            try:
                from cjklib.reading import ReadingFactory
            except ImportError:
                return pinyin
            else:
                self.reading_factory = ReadingFactory()
        return self.reading_factory.convert(pinyin,
                                            'Pinyin',
                                            'Pinyin',
                                            targetOptions={
                                                'toneMarkType': 'numbers'
                                            }).replace('5', '0')

    def get_flag_icon(self):
        """
        Set self.site_icon to the right icon.

        We should use different icons, depending on the request
        language.  We store these icons in self.site_icon_dict and use the
        AudioDownloader.maybe_get_icon() if we don't have it yet.
        """
        if not with_pyqt:
            return
        try:
            # If this works we already have it.
            self.site_icon = self.site_icon_dict[self.language]
        except KeyError:
            # We have to get it ourself. (We know it's just 16x16, so
            # no resize. And we know the address).
            self.site_icon_dict[self.language] = \
                QImage.fromData(self.get_data_from_url(
                    self.icon_url_dict[self.language]))
            self.site_icon = self.site_icon_dict[self.language]

    def set_names(self, text, base, ruby):
        """
        Set the display text and file base name variables.
        """
        if self.language == self.chinese_code:
            if not ruby:
                raise ValueError('Nothing to download')
            self.base_name = u"{0}_{1}".format(base, ruby)
            self.display_text = u"{1} ({0})".format(base, ruby)
        else:
            if not text:
                raise ValueError('Nothing to download')
            self.base_name = text
            self.display_text = text
Ejemplo n.º 12
0
f = ReadingFactory()


[
    'GR', 'Pinyin', 'WadeGiles', 'MandarinBraille', 'MandarinIPA',
    'ShanghaineseIPA',
    #'Hangul',
    #'Kana', 'Hiragana', 'Katakana',
    'CantoneseYale', 'CantoneseIPA', 'Jyutping'
]


DConv = {
    # Mandarin conversions
    ('cmn_Latn|Gwoyeu Romatzyh', 'cmn_Latn|x-Pinyin'): lambda s: f.convert(s, 'GR', 'Pinyin'),
    ('cmn_Latn|Gwoyeu Romatzyh', 'cmn_Latn|Wade-Giles'): lambda s: f.convert(s, 'GR', 'WadeGiles'),
    ('cmn_Latn|Gwoyeu Romatzyh', 'cmn_Latn|Braille'): lambda s: f.convert(s, 'GR', 'MandarinBraille'),
    ('cmn_Latn|Gwoyeu Romatzyh', 'cmn_Latn|Alternative IPA'): lambda s: f.convert(s, 'GR', 'MandarinIPA'),


    ('cmn_Latn|Numeric Pinyin', 'cmn_Latn|x-Pinyin'): lambda s: f.convert(s, 'Pinyin', 'Pinyin', sourceOptions={
        'toneMarkType': 'numbers'
    }),
    ('cmn_Latn|Numeric Pinyin', 'cmn_Latn|Gwoyeu Romatzyh'): lambda s: f.convert(s, 'Pinyin', 'GR', sourceOptions={
        'toneMarkType': 'numbers'
    }),
    ('cmn_Latn|Numeric Pinyin', 'cmn_Latn|Wade-Giles'): lambda s: f.convert(s, 'Pinyin', 'WadeGiles', sourceOptions={
        'toneMarkType': 'numbers'
    }),
    ('cmn_Latn|Numeric Pinyin', 'cmn_Latn|Braille'): lambda s: f.convert(s, 'Pinyin', 'MandarinBraille', sourceOptions={
Ejemplo n.º 13
0
Radical 9	9	4EBA	man	rén
Radical 30	30	53E3	mouth	kǒu	
Radical 61	61	5FC3	heart	xīn	
Radical 3	3	4E36	dot	zhù	
Radical 4	4	4E3F	slash	piě
Radical 5	5	4E59	second, fishing hook	yǐ	
Radical 6	6	4E85	hook	jué	
Radical 7	7	4E8C	two	èr	
Radical 8	8	4EA0	lid, head	tóu
Radical 10	10	513F	legs	ér	
Radical 11	11	5165	enter	rù	
Radical 12	12	516B	eight	bā
Radical 140	140	8278	grass	cǎo
Radical 24	24	5341	ten	shí
Radical 13	13	5182	wide	jiōng
Radical 14	14	5196	cover	mī
Radical 15	15	51AB	ice	bīng
"""

from cjklib.reading import ReadingFactory
f = ReadingFactory()

for line in entries.split('\n'):
    if not line.strip():
        continue
    _, radicalIdx, _, meaning, pinyin = line.strip('\t').split('\t')
    pinyinNumbers = f.convert(pinyin, 'Pinyin', 'Pinyin',
        targetOptions={'toneMarkType': 'numbers'})
    print '%(idx)d,"%(pinyin)s","%(meaning)s"' \
        % {'meaning': meaning, 'idx': int(radicalIdx), 'pinyin': pinyinNumbers}
Ejemplo n.º 14
0
class NTrain(Tk.Tk):
    def __init__(self, *args, **kwargs):
        Tk.Tk.__init__(self, *args, **kwargs)
        self.title("Ntrain")
        # place window in the center
        self.eval('tk::PlaceWindow %s center' %
                  self.winfo_pathname(self.winfo_id()))
        self._default_font = tkFont.nametofont("TkDefaultFont")
        self._default_font.configure(size=30)
        # define default dataset
        self._defaultfile = os.path.join(
            os.path.dirname(os.path.abspath(__file__)), 'chinese100.xlsx')

        # load default filename into label
        basename = os.path.basename(self._defaultfile)
        self._filename_value = Tk.StringVar()
        self._sett_fn_label = Tk.Entry(textvariable=self._filename_value,
                                       font=self._default_font,
                                       width=12)
        self._filename_value.set(basename)
        self._sett_fn_label.grid(row=1, column=0, sticky=Tk.W)

        # button to browse for datafile
        self.browse = Tk.Button(self, text="Browse", command=self._get_file)
        self.browse.grid(row=1, column=1, sticky=Tk.W)

        # OK button to start game
        self._reset_button = Tk.Button(text="Reset", command=self._reset_list)
        self._reset_button.grid(row=1, column=2)

        # label
        self._sett_label = Tk.Label(text="Number of Cards:")
        self._sett_label.grid(row=2, column=0, sticky=Tk.E)

        # entry field for number of cards
        entryText = Tk.StringVar()
        self._sett_entry = Tk.Entry(textvariable=entryText,
                                    font=self._default_font,
                                    width=3)
        entryText.set("30")
        self._sett_entry.grid(row=2, column=1, sticky=Tk.W)
        self._sett_entry.focus_set()

        # reverse option
        self._radio_val = Tk.IntVar()
        self._radio1 = Tk.Radiobutton(text="Ch to E",
                                      variable=self._radio_val,
                                      value=1)
        self._radio1.grid(row=4, column=0)
        self._radio2 = Tk.Radiobutton(text="E to Ch",
                                      variable=self._radio_val,
                                      value=2)
        self._radio2.grid(row=4, column=1)
        self._radio_val.set(1)

        # OK button to start game
        self._sett_button = Tk.Button(text="OK", command=self._start_game)
        self._sett_button.grid(columnspan=3)

        # Bind return key to start game
        self.bind('<Return>', self._start_game)

        self._p = Pinyin()
        self._f = ReadingFactory()

    def _get_file(self):
        # open dialogue to chose datafile
        my_file = askopenfilename()
        # update label to show filename in gui
        self._filename_value.set(os.path.basename(my_file))

    def _start_game(self, *args):
        # get filname
        self._datafile = os.path.join(
            os.path.dirname(os.path.abspath(__file__)),
            self._filename_value.get())

        # get number of cards
        self._n_cards = int(self._sett_entry.get())
        # remove previous gui components
        self._sett_fn_label.destroy()
        self.browse.destroy()
        self._reset_button.destroy()
        self._sett_label.destroy()
        self._sett_entry.destroy()
        self._sett_button.destroy()
        self._radio1.destroy()
        self._radio2.destroy()

        self._save_reminder = 0
        # load in data file
        try:
            self._vocTot = pd.read_excel(self._datafile)
        except:
            tkMessageBox.showinfo("Error", "File not found!", icon='warning')
            self._restart()
        #pdb.set_trace()

        # get indices of all filled cards
        filled_idx = self._vocTot[self._vocTot['Learned'] == 0].index.tolist()
        del filled_idx[0]
        # shuffle indices
        self._renew_index(filled_idx)
        # setup new gui
        self._setup_game_gui()
        # start with first question
        self._show_next_question()

    def _setup_game_gui(self):
        # labels for chinese symbols
        self.C_labels = []
        # labels for questions
        self.Q_labels = []

        # label for correct solution
        self._sol_label_value = Tk.StringVar()
        self._sol_label = Tk.Label(textvariable=self._sol_label_value)
        self._sol_label.grid(row=3, column=2)

        # entry field for answer
        self._entry_value = Tk.StringVar()
        self._entry = Tk.Entry(textvariable=self._entry_value,
                               font=self._default_font)
        self._entry.grid(row=4, column=2)
        self._entry.focus_set()
        self.bind('<Return>', self._check_answer)

        # Check button
        self._check_button = Tk.Button(text="Check",
                                       command=self._check_answer)
        self._check_button.grid(row=1, column=1, sticky=Tk.W)

        # Save button
        self._save_button = Tk.Button(text="Save", command=self._save)
        self._save_button.grid(row=2, column=1, sticky=Tk.W)

        # Next button
        self._next_button = Tk.Button(text="Next",
                                      command=self._show_next_question)
        self._next_button.grid(row=3, column=1, sticky=Tk.W)

        # New button
        self._new_button = Tk.Button(text="New", command=self._restart)
        self._new_button.grid(row=4, column=1, sticky=Tk.W)

        # translate field
        self._tr_value = Tk.StringVar()
        self._tr = Tk.Entry(textvariable=self._tr_value,
                            font=self._default_font)
        self._tr.grid(row=5, column=2)

        self._tr_button = Tk.Button(text="E-C", command=self._translate)
        self._tr_button.grid(row=5, column=1, sticky=Tk.W)

        # initialize list of wrong cards
        self._wrong_indices = []
        # initialize current index
        self._no = 0

    def _renew_index(self, indices):
        # TODO: catch too many cards chosen as input
        shuffle(indices)
        # take the first n cards
        self._indices = indices[0:self._n_cards]

    def _show_next_question(self):
        try:
            # get the next index in the list
            self._no = self._indices.pop(0)

            # empty entry field
            self._entry_value.set("")
            # empty Q and C labels
            for i in self.C_labels:
                i.destroy()
            for i in self.Q_labels:
                i.destroy()
            i = 1
            self.C_labels = []
            self.Q_labels = []

            # loop over Chinese characters
            for char in self._vocTot.C[self._no]:
                my_pinyin = self._p.get_pinyin(char, ' ')
                self.C_labels.append(Tk.Label(text=char))
                self.C_labels[-1].grid(row=2, column=i + 1)
                to_tone = (to_tone_number(my_pinyin))
                if "1" in to_tone:
                    self.C_labels[-1].config(fg='red')
                elif "2" in to_tone:
                    self.C_labels[-1].config(fg='green')
                elif "3" in to_tone:
                    self.C_labels[-1].config(fg='blue')
                elif "4" in to_tone:
                    self.C_labels[-1].config(fg='purple')
                else:
                    self.C_labels[-1].config(fg='grey')
                if self._radio_val.get() == 1:
                    self.Q_labels.append(Tk.Label(text=my_pinyin))
                    self.Q_labels[-1].grid(row=1, column=i + 1)
                i += 1

            if self._radio_val.get() == 1:
                self._curr_ans = self._vocTot.E[self._no].encode('utf-8')
            elif self._radio_val.get() == 2:
                try:
                    my_english = self._vocTot.E_long[self._no].encode('utf-8')
                except:
                    my_english = self._vocTot.E[self._no].encode('utf-8')
                self.Q_labels.append(Tk.Label(text=my_english))
                self.Q_labels[-1].grid(row=1, column=2, columnspan=i - 1)
                self._curr_ans = self._p.get_pinyin(self._vocTot.C[self._no],
                                                    ' ')

            self._entry.grid(row=4, column=2, columnspan=i - 1)

            # set real_correct to default value of yes
            self._real_correct = 1
        except IndexError:
            # start new round, when no card in list left
            self._new_round()

    def _check_answer(self, *args):

        # derive input
        answer = self._entry_value.get().strip().lower()

        # convert numbers, if provided, to pinjin tone mark
        tone = 0
        if any(char.isdigit() for char in answer):
            tone = 1
            answer = self._f.convert(answer,
                                     'Pinyin',
                                     'Pinyin',
                                     sourceOptions={
                                         'toneMarkType': 'numbers'
                                     }).encode('utf-8')

        # derive expected answer
        # ask for English word
        if self._radio_val.get() == 1:
            answer_to_check = self._curr_ans.encode('utf-8').lower()
        # ask for Chinese word
        elif self._radio_val.get() == 2:
            # pinyin with tone marks
            if tone == 1:
                answer_to_check = self._p.get_pinyin(
                    self._vocTot.C[self._no], ' ').encode('utf-8').lower()
            # pinyin without tone marks
            else:
                answer_to_check = self._p.get_pinyin(
                    self._vocTot.C[self._no], ' ').encode('utf-8').lower()

        # check if answer is correct
        if answer == answer_to_check:
            # if correct: move to 'learned' columns
            # move columns if card was correct on first attempt
            if self._real_correct:
                self._vocTot.Learned[self._no] = 1
                self._save_reminder = 1

            self._sol_label_value.set("")
            # if correct, go on to next card
            self._show_next_question()
        else:
            # if wrong:
            self._real_correct = 0
            # store index in list of wrong cards
            self._wrong_indices.append(self._no)
            # display correct answer
            self._sol_label_value.set(self._curr_ans)
            self._sol_label.grid(row=3,
                                 column=2,
                                 columnspan=len(self.C_labels))
            # clear entry field
            self._entry_value.set("")

    def _new_round(self):
        # if wrong cards still left, start new round
        if self._wrong_indices:
            # empty all display fields
            self._sol_label_value.set("New round!")
            self._renew_index(self._wrong_indices)
            # clear list of wrong indices
            self._wrong_indices = []
            # start new round with the next question
            self._show_next_question()
        else:
            # if no wrong cards left, finish the session
            self._exit()

    def _reset_list(self):
        self._datafile = os.path.join(
            os.path.dirname(os.path.abspath(__file__)),
            self._filename_value.get())
        my_file = pd.read_excel(self._datafile)
        my_file['Learned'] = 0
        writer = ExcelWriter(self._datafile)
        #pdb.set_trace()
        my_file.to_excel(writer, 'Sheet1', index=False)
        writer.save()

    def _exit(self):
        try:
            self._save()
        except:
            self._sol_label_value.set("Didn't work?!")
            return
        self._sol_label_value.set("")
        self._entry_value.set("")
        self._sol_label_value.set("Done!")
        self._check_button['state'] = 'disabled'
        self._save_button['state'] = 'disabled'
        self.unbind('<Return>')
        self._next_button['state'] = 'disabled'

    def _save(self, *args):
        writer = ExcelWriter(self._datafile)
        self._vocTot.to_excel(writer, 'Sheet1', index=False)
        writer.save()
        self._sol_label_value.set("Saved!")
        self._sol_label.grid(row=3, column=2, columnspan=len(self.C_labels))
        self._save_reminder = 0

    def _restart(self):
        if self._save_reminder:
            result = tkMessageBox.askquestion("Warning",
                                              "Save before exiting?")
            if result == 'yes':
                try:
                    self._save()
                except:
                    self._sol_label_value.set("Didn't work?!")
                    return
        python = sys.executable
        os.execl(python, python, *sys.argv)

    def _translate(self):
        to_translate = self._tr_value.get()
        if isinstance(to_translate, unicode):
            # Todo: doesn't work
            url = 'https://translate.google.com/#zh-CN/en/' + to_translate
        else:
            to_translate = to_translate.replace(' ', '%20')
            url = 'https://translate.google.com/#en/zh-CN/' + to_translate
        webbrowser.open(url)
Ejemplo n.º 15
0
    def handle_noargs(self, **options):
        # EXAMPLE: 一中一台 [yi1 Zhong1 yi1 Tai2] /first meaning/second meaning/
        file = open(settings.DICT_FILE_LOCATION)
        r_server = _get_redis()
        
        # EMPTY ALL EN KEYS FROM THE DATABASE
        item_count = 0
        keys = r_server.keys('EN:*')
        for x in keys:
            r_server.delete(x)
            item_count += 1
        print "Deleted %s items" % item_count
        
        
        # NOW LETS START
        item_count = 0
        for line in file:
            if not line.startswith("#"):

                # GATHER ALL THE MAIN VARIABLES
                new = line.split()
                characters = new[1]
                numbered_pinyin = line[(line.index('[')+1):(line.index(']'))]
                f = ReadingFactory()
                tonal_pinyin =  f.convert(numbered_pinyin, 'Pinyin', 'Pinyin',
                    sourceOptions={'toneMarkType': 'numbers', 'yVowel': 'v',
                    'missingToneMark': 'fifth'})
                meanings = line[(line.index('/')+1):(line.rindex('/'))]               
                
                # CREATE AN INDEX: What we'll do first is try to strip out
                # as much crap as possible from each definition, and as close as
                # possible find a single word that we can index on.
                
                for x in meanings.split('/'):
                    
                    ns = x # new_string
                    
                    # REMOVE ANYTHING BETWEEN BRACKETS
                    try:
                        ns = ns.replace(ns[(ns.index('(')+1):(ns.index(')'))], '')
                        ns = ns.replace('(', '').replace(')', '') #replace the brackets too
                    except ValueError:
                        pass
                    
                    # REMOVE ANYTHING BETWEEN SQUARE BRACKETS
                    try:
                        ns = ns.replace(ns[(ns.index('[')+1):(ns.index(']'))], '')
                        ns = ns.replace('[', '').replace(']', '') #replace the brackets too
                    except ValueError:
                        pass
                    
                    # IGNORE THE MEANING IF IT CONTAINS AN EXCLUDED PHRASE 
                    if len(filter(lambda y: y not in ns, EXCLUSIONS)) != len(EXCLUSIONS):
                        continue
                                        
                    # IF THE MEANING IS NOW EMPTY, IGNORE IT
                    ns = ns.strip()
                    if ns == '':
                        continue
                    
                    # DEAL WITH INFINITIVE VERBS LIKE "TO DO" WITH 2 WORDS
                    if len(ns.split(' ')) <= 3 and ns.startswith('to '):
                        ns = ns.split(' ', 1)[1]
                    
                    # REMOVE ITEMS LIKE "SEE XYZ"
                    if ns.split(' ')[0] == 'see' and ns[-1] not in string.ascii_letters:
                        continue
                    
                    # THERE'S ALSO SOME ANNOYING "..." MARKS TOO
                    if "..." in ns:
                        ns = ns.replace('...', '')                    
                    
                    
                    # FOR NOW, JUST ADD ITEMS WITH 2 WORDs
                    if len(ns.split(' ')) <= 3:
                        
                        key = "EN:%sW:%s" % (len(ns.split(' ')), ns.lower())
                        print key
                        if r_server.exists(key):
                            values = json.loads(_search_redis(key))
                            values['characters'].append(characters)
                            r_server.set(key, json.dumps(values))

                        else:
                            
                            values = {
                                'english': x,
                                'characters': [characters,],
                            }
                            
                            r_server.set(key, json.dumps(values))
                        
                        item_count += 1
                        print item_count
                        
            #if item_count > 20:
            #    break
                                        
                    
                
                
                
                                
        
        print "%s English dictionary items added" % item_count          
        file.close()        
Ejemplo n.º 16
0
class LeoDownloader(AudioDownloader):
    """Download audio from LEO"""
    def __init__(self):
        AudioDownloader.__init__(self)
        self.file_extension = u'.mp3'
        self.url = 'http://www.leo.org/dict/audio_{language}/{word}.mp3'
        # And, yes, they use ch for Chinese.
        # (I'm not sure if they really have anything for ru or it.)
        self.language_dict = {'de': 'de', 'en': 'en', 'es': 'es', 'fr': 'fr',
                              'it': 'it', 'ru': 'ru', 'zh': 'ch'}
        # It kind of looks like they have Swiss pronunciations, but hey don't.
        self.chinese_code = 'ch'
        # We should keep a number of site icons handy, with the right
        # flag for the request.
        self.site_icon_dict = {}
        self.site_file_name_encoding = 'ISO-8859-1'
        self.icon_url_dict = {
            'de': 'http://dict.leo.org/favicon.ico',
            'en': 'http://dict.leo.org/favicon.ico',
            'es': 'http://dict.leo.org/favicon_es.ico',
            'fr': 'http://dict.leo.org/favicon_fr.ico',
            'it': 'http://dict.leo.org/favicon_it.ico',
            'ru': 'http://dict.leo.org/favicon_ru.ico',
            # When we use this dict, we have already munged the 'zh' to 'ch'
            'ch': 'http://dict.leo.org/favicon_ch.ico'}
        # As the name implies, a hack. Try to use the cjklib TTEMPÉ
        # brings along. A syntem-wide installed one should work as
        # well.
        self.have_tried_cjklib_hack = False
        self.reading_factory = None

    def download_files(self, word, base, ruby, split):
        """
        Download a word from LEO

        We try to get pronunciations for the text for German, English,
        Spanish, French, Italian and Russian, and from the ruby for
        Chinese. There may not be any pronunciations available for
        Italian or Russian.
        """
        self.downloads_list = []
        # Fix the language. EAFP.
        self.language = self.language_dict[self.language[:2].lower()]
        # set_names also checks the language.
        self.set_names(word, base, ruby)
        # Only get the icon when we have a word
        # self.maybe_get_icon()
        self.get_flag_icon()
        # EAFP. self.query_url may return None...
        word_url = self.query_url(word, ruby)
        # ... then the get_data will blow up
        word_data = self.get_data_from_url(word_url)
        word_file_path, word_file_name = self.get_file_name()
        with open(word_file_path, 'wb') as word_file:
            word_file.write(word_data)
        # We have a file, but not much to say about it.
        self.downloads_list.append(
            (word_file_path, word_file_name, dict(Source='Leo')))

    def query_url(self, word, ruby):
        """Build query URL"""
        if self.chinese_code == self.language:
            word = self.fix_pinyin(ruby)
        return self.url.format(
            language=self.language, word=urllib.quote(word.encode(
                    self.site_file_name_encoding)))

    def fix_pinyin(self, pinyin):
        # Hacks. It is overkill to ship cjklib with this add-on. But
        # to get the tone numbers as numbers, we should use it. My
        # hope (guess) is that the typical user that will want Chinese
        # pronunciations will also have TTEMPÉ's (version of mine)
        # chinese-support-plugin installed. So try to use that and
        # don't complain if it doesn't work.
        if not self.have_tried_cjklib_hack:
            try:
                # If this works, the whole shebang is run as an Anki2
                # add-on. If not, we will still look for a system-wide
                # cjklib, but obviously not for anothre add-on.
                from aqt.utils import isWin
            except:
                pass
            else:
                from aqt import mw
                addon_dir = mw.pm.addonFolder()
                if isWin:
                    # The isWin bit is copied from TTEMPÉ's code.
                    addon_dir = addon_dir.encode(sys.getfilesystemencoding())
                sys.path.append(os.path.join(addon_dir, "chinese"))
            self.have_tried_cjk_hack = True
        if not self.reading_factory:
            try:
                from cjklib.reading import ReadingFactory
            except ImportError:
                return pinyin
            else:
                self.reading_factory = ReadingFactory()
        return self.reading_factory.convert(
            pinyin, 'Pinyin',  'Pinyin', targetOptions={
                'toneMarkType': 'numbers'}).replace('5', '0')

    def get_flag_icon(self):
        """
        Set self.site_icon to the right icon.

        We should use different icons, depending on the request
        language.  We store these icons in self.site_icon_dict and use the
        AudioDownloader.maybe_get_icon() if we don't have it yet.
        """
        if not with_pyqt:
            return
        try:
            # If this works we already have it.
            self.site_icon = self.site_icon_dict[self.language]
        except KeyError:
            # We have to get it ourself. (We know it's just 16x16, so
            # no resize. And we know the address).
            self.site_icon_dict[self.language] = \
                QImage.fromData(self.get_data_from_url(
                    self.icon_url_dict[self.language]))
            self.site_icon = self.site_icon_dict[self.language]

    def set_names(self, text, base, ruby):
        """
        Set the display text and file base name variables.
        """
        if self.language == self.chinese_code:
            if not ruby:
                raise ValueError('Nothing to download')
            self.base_name = u"{0}_{1}".format(base, ruby)
            self.display_text = u"{1} ({0})".format(base, ruby)
        else:
            if not text:
                raise ValueError('Nothing to download')
            self.base_name = text
            self.display_text = text