def convertToKana(self): inputLen = len(self.lookup.text()) if inputLen > 0: if scripts.script_type(self.lookup.text()) == scripts.Script.Kanji: pass #TODO: ... #if re.search('n{1}', self.lookup.text()[ inputLen - 2: ]) is None: #NB: yes, regexp would be better, yet I failed miserably at it if self.lookup.text()[ inputLen - 1 ] != u'n' and self.lookup.text()[ inputLen - 2:] != u'ny': converted = romkan(self.lookup.text()) #NB: does not convert naninuneno, somehow (purpotedly, 'n' normalization is to blame) self.lookup.setText(converted) #self.testConvert.setText(converted) if self.lookup.text()[ inputLen - 2:] == u'nn': converted = romkan(normalize_double_n(self.lookup.text())) self.lookup.setText(converted) #print self.lookup.text() #scripts.script_type(cluster) == scripts.Script.Kanji: if len(scripts.script_boundaries(self.lookup.text())) == 1: if scripts.script_type(self.lookup.text()) == scripts.Script.Hiragana: self.updateLookupResults(self.lookup.text()) else: self.lookupResults.clearContents() self.lookupResults.setRowCount(0)
def parseWordToKanji(self): script = scripts.script_boundaries(self.itemsMenu.wordInfo.text()) components = u'' kanjiList = [] for cluster in script: if scripts.script_type(cluster) == scripts.Script.Kanji: for kanji in cluster: if not kanji in kanjiList: kanjiList.append(kanji) try: lookup = self.kdict[kanji] kun = lookup.kun_readings; on = lookup.on_readings; gloss = lookup.gloss components += '<b>(' + kanji + ')</b>\t' #components += '(' + kanji + ')\t' if len(kun) > 0: components += '<b>kun:</b>' + ', '.join(kun) + '\t' if len(on) > 0: components += '<b>on:</b>' + ', '.join(on) + '<br/>' if len(gloss) > 0: components += "<font style='font-family: Calibri; font-size: 11pt'>" + ", ".join(gloss) + "</font><br/>" except: components += kanji + '<br/>' return components.rstrip('<br/>')
def _jpn(token): """Convert jpn token to phonemes.""" from cjktools import scripts from cjktools.resources import kanjidic lkp = {} for fn in ["lib/data/phon/ja-Hira", "lib/data/phon/ja-Kata"]: lines = open(fn).readlines() if len(lines) == 0: continue for line in lines: if line.strip() == "": continue kv = line.strip().split("\t") if len(kv) != 2: print("!", kv, file=sys.stderr) continue k = kv[0].strip() v = kv[1].strip() if k not in lkp: lkp[k] = [] lkp[k].append(v) kjd = kanjidic.Kanjidic(kanjidic_files=["lib/data/dict/ja"]) op = "" segs = scripts.script_boundaries(token) for seg in segs: tipus = scripts.script_types(seg) if 3 in tipus: for ch in seg: if ch in kjd: if len(kjd[ch].on_readings) > 0: op += kjd[ch].on_readings[0] else: op += seg res = _maxphon(lkp, op) if res == "": return "?" return res
def process_response(self, request, response): if response.status_code != 200: return response if not response.get('Content-Type', '').startswith('text/html'): return response content = response.content.decode('utf8') if not scripts.script_types(content).intersection( self.japanese_scripts): return response parts = [] for part in scripts.script_boundaries(content): if scripts.script_type(part) in self.japanese_scripts: parts.append('<span lang="ja" xml:lang="ja">%s</span>' % part) else: parts.append(part) response.content = u''.join(parts).encode('utf8') return response
def expand_long_vowels(kana_string): """ Expands whatever long vowels are possible to expand. >>> a = expand_long_vowels(u'すー') >>> b = u'すう' >>> a == b True """ script_converters = { scripts.Script.Hiragana: lambda x: x, scripts.Script.Katakana: scripts.to_katakana } table = kana_table.KanaTable.get_cached() out_string = '' for segment in scripts.script_boundaries(kana_string): if len(segment): char_type = scripts.script_type(segment) if char_type not in script_converters: out_string += segment continue reverse_operation = script_converters[char_type] segment = scripts.to_hiragana(segment) else: continue for m in _long_finder.finditer(segment): i = m.start() vowel = table.to_vowel_line(segment[i - 1]) segment = segment[:i] + vowel + segment[i + 1:] out_string += reverse_operation(segment) return out_string
def expand_long_vowels(kana_string): """ Expands whatever long vowels are possible to expand. >>> a = expand_long_vowels(u'すー') >>> b = u'すう' >>> a == b True """ script_converters = {scripts.Script.Hiragana: lambda x: x, scripts.Script.Katakana: scripts.to_katakana} table = kana_table.KanaTable.get_cached() out_string = '' for segment in scripts.script_boundaries(kana_string): if len(segment): char_type = scripts.script_type(segment) if char_type not in script_converters: out_string += segment continue reverse_operation = script_converters[char_type] segment = scripts.to_hiragana(segment) else: continue for m in _long_finder.finditer(segment): i = m.start() vowel = table.to_vowel_line(segment[i-1]) segment = segment[:i] + vowel + segment[i+1:] out_string += reverse_operation(segment) return out_string
def sift_nonj_characters(data, plain): parts = scripts.script_boundaries(plain) for part in parts: if scripts.script_type(part) is scripts.Script.Ascii: data = data.replace(part, '') return data
def eventFilter(self, object, event): if event.type() == QEvent.HoverLeave: object.setStyleSheet("QLabel { color: rgb(0, 0, 0); }") object.parent().info.hide() object.parent().allInfo.hide() object.parent().kanjiInfo.hide() object.parent().kanjiGroups.hide() desktop = QApplication.desktop().screenGeometry() object.parent().info.setGeometry(QRect(desktop.width() - H_INDENT - I_WIDTH - I_INDENT, desktop.height() - V_INDENT, I_WIDTH, I_HEIGHT)) if event.type() == QEvent.HoverEnter: object.setStyleSheet("QLabel { color: rgb(0, 5, 255); }") object.parent().info.item.setText(object.text()) reading = object.parent().srs.getWordPronunciationFromExample(object.text()) if reading != object.text() : object.parent().info.reading.setText(reading) else: object.parent().info.reading.setText(u'') #parsing word script = scripts.script_boundaries(object.text()) components = [] for cluster in script: if scripts.script_type(cluster) == scripts.Script.Kanji: for kanji in cluster: components = components + list(object.parent().rdk[kanji]) + list('\n') #setting radikals if len(components) > 0: components.pop() #remove last '\n' object.parent().info.components.setText(' '.join(components)) object.parent().info.show() if event.type() == QEvent.MouseButtonPress: # item context menu # if event.button() == Qt.MiddleButton: object.parent().info.hide() object.parent().allInfo.hide() object.parent().kanjiInfo.hide() script = scripts.script_boundaries(object.text()) resulting_info = u'' # kanji_groups = {} kanji_groups = OrderedDict() for cluster in script: if scripts.script_type(cluster) == scripts.Script.Kanji: # for kanji in cluster[::-1]: for kanji in cluster: similar = object.parent().groups.findSimilarKanji(kanji) try: kanji_groups[kanji] = similar[:similar.index(kanji)] + similar[similar.index(kanji) + 1:] except Exception: kanji_groups[kanji] = object.parent().groups.findSimilarKanji(kanji) log.debug(u'Not in group: ' + kanji) for kanji in kanji_groups: # for kanji in list(reversed(sorted(kanji_groups.keys()))): resulting_info += kanji + u' ~\t' for item in kanji_groups[kanji]: lookup = object.parent().kjd[item] resulting_info += " " + item + " <font style='font-family: Calibri; font-size: 12pt'>(" + lookup.gloss[0] + ")</font> " resulting_info += '<br/>' if resulting_info == u'': resulting_info = u'No such groups in Kanji.Odyssey!' object.parent().kanjiGroups.info.setText(resulting_info) object.parent().kanjiGroups.show() # kanji info # if event.button() == Qt.RightButton: object.parent().info.hide() object.parent().allInfo.hide() object.parent().kanjiGroups.hide() object.parent().kanjiInfo.info.setText(u'') script = scripts.script_boundaries(object.text()) resulting_info = u'' for cluster in script: if scripts.script_type(cluster) == scripts.Script.Kanji: for kanji in cluster: try: lookup = object.parent().kjd[kanji] kun = lookup.kun_readings; on = lookup.on_readings; gloss = lookup.gloss resulting_info += "<font style='font-family: " + Fonts.HiragiNoMyoutyouProW3 + "; font-size: 16.5pt'>(" + kanji + ")</font>\t" if len(kun) > 0: resulting_info += '<b>kun: </b>' + ', '.join(kun) + '\t' if len(on) > 0: resulting_info += '<b>on:</b>' + ', '.join(on) + '<br/>' if len(gloss) > 0: resulting_info += "<font style='font-family: Calibri; font-size: 12pt'>" + ", ".join(gloss) + "</font><br/>" except: components += kanji + '<br/>' if resulting_info != '': if resulting_info.count('<br/>') > 7: object.parent().kanjiInfo.setStyleSheet('QLabel { font-size: 13pt }') object.parent().kanjiInfo.info.setText(resulting_info.rstrip('<br/>')) else: object.parent().kanjiInfo.info.setText(u'No such kanji in kanjidic!') object.parent().kanjiInfo.show() # translation and strokes info # if event.button() == Qt.LeftButton: object.parent().kanjiInfo.hide() object.parent().info.hide() object.parent().kanjiGroups.hide() unfillLayout(object.parent().allInfo.layout) object.parent().allInfo.layout.setMargin(1) kanjiList = [] script = scripts.script_boundaries(object.text()) for cluster in script: if scripts.script_type(cluster) == scripts.Script.Kanji: for kanji in cluster: kanjiList.append(kanji) i=0; j=0; # kanji strokes if len(kanjiList) > 0: infile = open(PATH_TO_RES + STROKES + KANJI_MANIFEST, 'r') text = infile.read() infile.close() for kanji in kanjiList: if( text.find(kanji.encode('utf-8').encode('hex')) != -1): gif = QLabel() gif.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding) gif.setAlignment(Qt.AlignCenter) movie = QMovie(PATH_TO_RES + STROKES + kanji.encode('utf-8').encode('hex') + '.gif', QByteArray(), self) movie.setCacheMode(QMovie.CacheAll) movie.setSpeed(150) gif.setMovie(movie) object.parent().allInfo.layout.addWidget(gif, i, j); j = j + 1 movie.start() i = i + 1 # words translation translations = QLabel(u'') translations.setFont(QFont('Calibri', 11)) translations.setWordWrap(True) translations.setAlignment(Qt.AlignCenter) try: search = object.parent().edict[object.parent().srs.getWordNonInflectedForm(object.text())] translationText = u'' variants = search.senses_by_reading()[object.parent().srs.getWordPronounciation(object.parent().srs.getWordNonInflectedForm(object.text()))][:3] variants = filter (lambda e: e != '(P)', variants) translationText += '<b>' + object.parent().srs.getWordPronunciationFromExample(object.text()) + '</b>:\t' + ', '.join(variants) translations.setText(translationText.rstrip('\n')) except: ### by reading search = object.parent().jmdict.lookupTranslationByReadingJoin(object.parent().srs.getWordPronounciation(object.parent().srs.getWordNonInflectedForm(object.text())), object.parent().options.getLookupLang()) if len(search) > 0: if len(search) > 5: search = search[:5] translations.setText('<b>' + object.parent().srs.getWordPronunciationFromExample(object.text())+ '</b>:\t' + ', '.join(search)) ### by kanji else: search = object.parent().jmdict.lookupItemByReading(object.parent().srs.getWordPronounciation(object.parent().srs.getWordNonInflectedForm(object.text()))) if len(search) > 0: lookup = object.parent().jmdict.lookupItemTranslationJoin(search[0], object.parent().options.getLookupLang()) if len(lookup) > 5: lookup = lookup[:5] translations.setText('<b>' + object.parent().srs.getWordPronunciationFromExample(object.text())+ '</b>:\t' + ', '.join(lookup)) ### nothing found if len(search) == 0: translations.setText(u'Alas, no translation in edict or jmdict!') if i > 0: separator = QFrame() separator.setFrameShape(QFrame.HLine) separator.setFrameShadow(QFrame.Sunken) object.parent().allInfo.layout.addWidget(separator, i, 0, 1, j); i = i + 1 object.parent().allInfo.layout.addWidget(translations, i, 0, 1, j) object.parent().allInfo.update() object.parent().allInfo.show() elif object.parent().allInfo.isVisible(): object.parent().allInfo.hide() object.parent().info.show() return False
def addKanjiToStudy(self): script = scripts.script_boundaries(self.itemsMenu.wordInfo.text()) for cluster in script: if scripts.script_type(cluster) == scripts.Script.Kanji: for kanji in cluster: self.db.addKanjiToDb(kanji)