Esempio n. 1
0
 def convertToKana(self):
     
     inputLen = len(self.lookup.text())
     if inputLen > 0:
         
         if scripts.script_type(self.lookup.text()) == scripts.Script.Kanji:
             pass    #TODO: ...
         #if re.search('n{1}', self.lookup.text()[ inputLen - 2: ]) is None:                            #NB: yes, regexp would be better, yet I failed miserably at it
         if self.lookup.text()[ inputLen - 1 ] != u'n' and self.lookup.text()[ inputLen - 2:] != u'ny':
             converted = romkan(self.lookup.text())      #NB: does not convert naninuneno, somehow (purpotedly, 'n' normalization is to blame)
             self.lookup.setText(converted)
             #self.testConvert.setText(converted)
         if self.lookup.text()[ inputLen - 2:] == u'nn':
             converted = romkan(normalize_double_n(self.lookup.text()))
             self.lookup.setText(converted)
             
         #print self.lookup.text()
         #scripts.script_type(cluster) == scripts.Script.Kanji:
         
         if len(scripts.script_boundaries(self.lookup.text())) == 1:
             if scripts.script_type(self.lookup.text()) == scripts.Script.Hiragana:
                 self.updateLookupResults(self.lookup.text())
     else:
         self.lookupResults.clearContents()
         self.lookupResults.setRowCount(0)
Esempio n. 2
0
    def parseWordToKanji(self):

        script = scripts.script_boundaries(self.itemsMenu.wordInfo.text())
        components = u''
        kanjiList = []

        for cluster in script:
            if scripts.script_type(cluster) == scripts.Script.Kanji:
                for kanji in cluster:
                    if not kanji in kanjiList:
                        kanjiList.append(kanji)
                        try: 
                            lookup = self.kdict[kanji]
                            kun = lookup.kun_readings; on = lookup.on_readings; gloss = lookup.gloss
                            
                            components += '<b>(' + kanji + ')</b>\t'
                            #components += '(' + kanji + ')\t'
                            
                            if len(kun) > 0:
                                components += '<b>kun:</b>' + ', '.join(kun) + '\t'
                            if len(on) > 0:
                                components += '<b>on:</b>' + ', '.join(on) + '<br/>'
                            if len(gloss) > 0:
                                components += "<font style='font-family: Calibri; font-size: 11pt'>" + ", ".join(gloss) + "</font><br/>"
                                
                        except:
                            components += kanji + '<br/>'
                        
        return components.rstrip('<br/>')
Esempio n. 3
0
def _jpn(token):
    """Convert jpn token to phonemes."""
    from cjktools import scripts
    from cjktools.resources import kanjidic

    lkp = {}
    for fn in ["lib/data/phon/ja-Hira", "lib/data/phon/ja-Kata"]:
        lines = open(fn).readlines()
        if len(lines) == 0:
            continue
        for line in lines:
            if line.strip() == "":
                continue
            kv = line.strip().split("\t")
            if len(kv) != 2:
                print("!", kv, file=sys.stderr)
                continue
            k = kv[0].strip()
            v = kv[1].strip()
            if k not in lkp:
                lkp[k] = []
            lkp[k].append(v)

    kjd = kanjidic.Kanjidic(kanjidic_files=["lib/data/dict/ja"])
    op = ""
    segs = scripts.script_boundaries(token)
    for seg in segs:
        tipus = scripts.script_types(seg)
        if 3 in tipus:
            for ch in seg:
                if ch in kjd:
                    if len(kjd[ch].on_readings) > 0:
                        op += kjd[ch].on_readings[0]
        else:
            op += seg

    res = _maxphon(lkp, op)
    if res == "":
        return "?"
    return res
Esempio n. 4
0
    def process_response(self, request, response):
        if response.status_code != 200:
            return response

        if not response.get('Content-Type', '').startswith('text/html'):
            return response

        content = response.content.decode('utf8')
        if not scripts.script_types(content).intersection(
                    self.japanese_scripts):
            return response

        parts = []
        for part in scripts.script_boundaries(content):
            if scripts.script_type(part) in self.japanese_scripts:
                parts.append('<span lang="ja" xml:lang="ja">%s</span>' % part)
            else:
                parts.append(part)

        response.content = u''.join(parts).encode('utf8')

        return response
Esempio n. 5
0
def expand_long_vowels(kana_string):
    """
    Expands whatever long vowels are possible to expand.

        >>> a = expand_long_vowels(u'すー')
        >>> b = u'すう'
        >>> a == b
        True
    """
    script_converters = {
        scripts.Script.Hiragana: lambda x: x,
        scripts.Script.Katakana: scripts.to_katakana
    }

    table = kana_table.KanaTable.get_cached()

    out_string = ''
    for segment in scripts.script_boundaries(kana_string):
        if len(segment):
            char_type = scripts.script_type(segment)

            if char_type not in script_converters:
                out_string += segment
                continue

            reverse_operation = script_converters[char_type]
            segment = scripts.to_hiragana(segment)
        else:
            continue

        for m in _long_finder.finditer(segment):
            i = m.start()
            vowel = table.to_vowel_line(segment[i - 1])
            segment = segment[:i] + vowel + segment[i + 1:]

        out_string += reverse_operation(segment)

    return out_string
Esempio n. 6
0
def expand_long_vowels(kana_string):
    """
    Expands whatever long vowels are possible to expand.

        >>> a = expand_long_vowels(u'すー')
        >>> b = u'すう'
        >>> a == b
        True
    """
    script_converters = {scripts.Script.Hiragana: lambda x: x,
                         scripts.Script.Katakana: scripts.to_katakana}

    table = kana_table.KanaTable.get_cached()

    out_string = ''
    for segment in scripts.script_boundaries(kana_string):
        if len(segment):
            char_type = scripts.script_type(segment)

            if char_type not in script_converters:
                out_string += segment
                continue

            reverse_operation = script_converters[char_type]
            segment = scripts.to_hiragana(segment)
        else:
            continue

        for m in _long_finder.finditer(segment):
            i = m.start()
            vowel = table.to_vowel_line(segment[i-1])
            segment = segment[:i] + vowel + segment[i+1:]

        out_string += reverse_operation(segment)

    return out_string
Esempio n. 7
0
def sift_nonj_characters(data, plain):
    parts = scripts.script_boundaries(plain)
    for part in parts:
        if scripts.script_type(part) is scripts.Script.Ascii:
            data = data.replace(part, '')
    return data
Esempio n. 8
0
    def eventFilter(self, object, event):

        if event.type() == QEvent.HoverLeave:
            object.setStyleSheet("QLabel { color: rgb(0, 0, 0); }")
            
            object.parent().info.hide()
            object.parent().allInfo.hide()
            object.parent().kanjiInfo.hide()
            object.parent().kanjiGroups.hide()

            desktop = QApplication.desktop().screenGeometry()
            object.parent().info.setGeometry(QRect(desktop.width() - H_INDENT - I_WIDTH - I_INDENT, desktop.height() - V_INDENT, I_WIDTH, I_HEIGHT))
        
        if event.type() == QEvent.HoverEnter:
            object.setStyleSheet("QLabel { color: rgb(0, 5, 255); }")
            
            object.parent().info.item.setText(object.text())
            
            reading = object.parent().srs.getWordPronunciationFromExample(object.text())
            if reading != object.text() :  object.parent().info.reading.setText(reading)
            else:   object.parent().info.reading.setText(u'')
            
            #parsing word
            script = scripts.script_boundaries(object.text())
            components = []

            for cluster in script:
                if scripts.script_type(cluster) == scripts.Script.Kanji:
                    for kanji in cluster:
                        components = components + list(object.parent().rdk[kanji]) + list('\n')
                
            #setting radikals
            if len(components) > 0: components.pop()    #remove last '\n'
            object.parent().info.components.setText(' '.join(components))
            object.parent().info.show()

        if event.type() == QEvent.MouseButtonPress:
            # item context menu #
            if event.button() == Qt.MiddleButton:
                
                object.parent().info.hide()
                object.parent().allInfo.hide()
                object.parent().kanjiInfo.hide()
                
                script = scripts.script_boundaries(object.text())
                resulting_info = u''
#                kanji_groups = {}
                kanji_groups = OrderedDict()
    
                for cluster in script:
                    if scripts.script_type(cluster) == scripts.Script.Kanji:
#                        for kanji in cluster[::-1]:
                        for kanji in cluster:
                            similar = object.parent().groups.findSimilarKanji(kanji)
                            try:
                                kanji_groups[kanji] = similar[:similar.index(kanji)] + similar[similar.index(kanji) + 1:] 
                            except Exception:
                                kanji_groups[kanji] = object.parent().groups.findSimilarKanji(kanji)
                                log.debug(u'Not in group: ' + kanji)
                            
                for kanji in kanji_groups:
#                for kanji in list(reversed(sorted(kanji_groups.keys()))):
                    resulting_info += kanji + u' ~\t'
                    for item in kanji_groups[kanji]:
                        lookup = object.parent().kjd[item]
                        resulting_info += " " + item + " <font style='font-family: Calibri; font-size: 12pt'>(" + lookup.gloss[0] + ")</font> "
                    resulting_info += '<br/>'
                
                if resulting_info == u'': resulting_info = u'No such groups in Kanji.Odyssey!'
                object.parent().kanjiGroups.info.setText(resulting_info)
                
                object.parent().kanjiGroups.show()
                
            # kanji info #
            if event.button() == Qt.RightButton:
                    
                object.parent().info.hide()
                object.parent().allInfo.hide()
                object.parent().kanjiGroups.hide()
                
                object.parent().kanjiInfo.info.setText(u'')
                
                script = scripts.script_boundaries(object.text())
                resulting_info = u''
    
                for cluster in script:
                    if scripts.script_type(cluster) == scripts.Script.Kanji:
                        for kanji in cluster:
                            try:
                                lookup = object.parent().kjd[kanji]
                                kun = lookup.kun_readings; on = lookup.on_readings; gloss = lookup.gloss
                                
                                resulting_info += "<font style='font-family: " + Fonts.HiragiNoMyoutyouProW3 + "; font-size: 16.5pt'>(" + kanji + ")</font>\t"
                            
                                if len(kun) > 0:
                                    resulting_info += '<b>kun: </b>' + ', '.join(kun) + '\t'
                                if len(on) > 0:
                                    resulting_info += '<b>on:</b>' + ', '.join(on) + '<br/>'
                                if len(gloss) > 0:
                                    resulting_info += "<font style='font-family: Calibri; font-size: 12pt'>" + ", ".join(gloss) + "</font><br/>"
                            except:
                                components += kanji + '<br/>'
                
                if resulting_info != '':  
                    if resulting_info.count('<br/>') > 7:  object.parent().kanjiInfo.setStyleSheet('QLabel { font-size: 13pt }')
                    object.parent().kanjiInfo.info.setText(resulting_info.rstrip('<br/>'))
                    
                else: object.parent().kanjiInfo.info.setText(u'No such kanji in kanjidic!')
                object.parent().kanjiInfo.show()
                
            # translation and strokes info #
            if event.button() == Qt.LeftButton:
                
                object.parent().kanjiInfo.hide()
                object.parent().info.hide()
                object.parent().kanjiGroups.hide()
                              
                unfillLayout(object.parent().allInfo.layout)
                object.parent().allInfo.layout.setMargin(1)
                
                kanjiList = []
                script = scripts.script_boundaries(object.text())

                for cluster in script:
                    if scripts.script_type(cluster) == scripts.Script.Kanji:
                        for kanji in cluster:
                            kanjiList.append(kanji)
                
                i=0; j=0;
                # kanji strokes
                if len(kanjiList) > 0:
                    
                    infile = open(PATH_TO_RES + STROKES + KANJI_MANIFEST, 'r')
                    text = infile.read()
                    infile.close()
                    
                    for kanji in kanjiList:
                        
                        if( text.find(kanji.encode('utf-8').encode('hex')) != -1):
                        
                            gif = QLabel()
                            gif.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding)        
                            gif.setAlignment(Qt.AlignCenter) 
    
                            movie = QMovie(PATH_TO_RES + STROKES + kanji.encode('utf-8').encode('hex') + '.gif', QByteArray(), self) 
                            movie.setCacheMode(QMovie.CacheAll) 
                            movie.setSpeed(150) 

                            gif.setMovie(movie)
                            object.parent().allInfo.layout.addWidget(gif, i, j);   j = j + 1
                            movie.start()
                              
                    i = i + 1
                
                # words translation
                translations = QLabel(u'')
                translations.setFont(QFont('Calibri', 11))
                translations.setWordWrap(True)
                translations.setAlignment(Qt.AlignCenter)
                try:
                    search = object.parent().edict[object.parent().srs.getWordNonInflectedForm(object.text())]

                    translationText = u''
                    
                    variants = search.senses_by_reading()[object.parent().srs.getWordPronounciation(object.parent().srs.getWordNonInflectedForm(object.text()))][:3]
                    variants = filter (lambda e: e != '(P)', variants)                                                                         
                    
                    translationText += '<b>' + object.parent().srs.getWordPronunciationFromExample(object.text()) + '</b>:\t' + ', '.join(variants)
                    translations.setText(translationText.rstrip('\n'))
                    
                except:
                    ### by reading
                    search = object.parent().jmdict.lookupTranslationByReadingJoin(object.parent().srs.getWordPronounciation(object.parent().srs.getWordNonInflectedForm(object.text())), object.parent().options.getLookupLang())
                    if len(search) > 0:
                        if len(search) > 5: search = search[:5]
                        translations.setText('<b>' + object.parent().srs.getWordPronunciationFromExample(object.text())+ '</b>:\t' + ', '.join(search))
                    ### by kanji
                    else:
                        search = object.parent().jmdict.lookupItemByReading(object.parent().srs.getWordPronounciation(object.parent().srs.getWordNonInflectedForm(object.text())))
                        if len(search) > 0:
                            lookup = object.parent().jmdict.lookupItemTranslationJoin(search[0], object.parent().options.getLookupLang())
                            if len(lookup) > 5: lookup = lookup[:5]
                            translations.setText('<b>' + object.parent().srs.getWordPronunciationFromExample(object.text())+ '</b>:\t' + ', '.join(lookup))
                    ### nothing found
                    if len(search) == 0: translations.setText(u'Alas, no translation in edict or jmdict!')
                
                if i > 0:
                    separator = QFrame()
                    separator.setFrameShape(QFrame.HLine)
                    separator.setFrameShadow(QFrame.Sunken)
                    object.parent().allInfo.layout.addWidget(separator, i, 0, 1, j);   i = i + 1
                
                object.parent().allInfo.layout.addWidget(translations, i, 0, 1, j)
                
                object.parent().allInfo.update()
                object.parent().allInfo.show()
                
            elif object.parent().allInfo.isVisible():

                object.parent().allInfo.hide()   
                object.parent().info.show()
            
        return False
Esempio n. 9
0
 def addKanjiToStudy(self):
     script = scripts.script_boundaries(self.itemsMenu.wordInfo.text())
     for cluster in script:
         if scripts.script_type(cluster) == scripts.Script.Kanji:
             for kanji in cluster:
                 self.db.addKanjiToDb(kanji)
Esempio n. 10
0
def sift_nonj_characters(data, plain):
    parts = scripts.script_boundaries(plain)
    for part in parts:
        if scripts.script_type(part) is scripts.Script.Ascii:
            data = data.replace(part, '')
    return data