def maybe_potential_form(self) -> Optional[str]: pos = self.pos_str() surface = self.surface() maybe_dform = None if (pos[0] == "v" and len(self.morphemes) == 1 and self.morphemes[0].dictionary_form() == surface and romkan.to_roma(surface).endswith("eru") and not jmdict_lookup(surface).entries): suf = romkan.to_hiragana(romkan.to_roma(surface[-2:])[:-3] + "u") maybe_dform = surface[:-2] + suf elif (pos[0] == "v" and romkan.to_roma(self.morphemes[0].surface()).endswith("e") and not jmdict_lookup(surface).entries): suf = romkan.to_hiragana( romkan.to_roma(self.morphemes[0].surface()[-1])[:-1] + "u") maybe_dform = self.morphemes[0].surface()[:-1] + suf if not maybe_dform: return maybe_pos: SudachiPos = parse(maybe_dform)[0].part_of_speech() if (surface not in merge_multi_dicts([ flip_multi_dict(m) for m in all_conjugations(maybe_dform, maybe_pos).values() ]).keys()): return if not jmdict_lookup(maybe_dform).entries: return return maybe_dform
def get_not_halfway_hr(full_hiragana, progress_roma): """ 入力中に対しても正しいひらがな表記を取得する。 :param full_hiragana: 「全体の」ひらがな :param progress_roma: ローマ字 """ # 空文字なら空文字を返す if len(progress_roma) == 0: return "" # 全体のひらがなに対し、どこまで打っているのかを見る romaji = hira2roma(full_hiragana) index = romaji.rfind(progress_roma) # 今母音を打とうとしていて、かつ直前に子音を打っている (taやhaなどに引っかかる) if re.match("[aeiouyn]", romaji[index]) or romkan.is_consonant( romaji[index - 1]): # 3文字で構成されるローマ表記の文字を打っているか (tyaやhyuなどに引っかかる) if index >= 2 and romkan.is_consonant(romaji[index - 2]): return romkan.to_hiragana(romaji[index - 2:])[1:] else: return romkan.to_hiragana(romaji[index - 1:]) return romkan.to_hiragana(romaji[index:])
def test_cli(lbound, ubound, count): score = 0 for i in range(count): word = generateWord(lbound, ubound) print romkan.to_hiragana(word) userInput = str(raw_input("in romanji: ")) tts(romkan.to_hiragana(word)) if userInput == word: print "Correct" score += 1 else: print "Incorrect" print "The correct answer was: " + word print str(score) + "/" + str(count)
def search(jisho, all_mode, rounds, verbose): """Dictionary mode, where kanji previously read can be searched""" if all_mode: show_all(jisho) elif rounds > 0: game(jisho, rounds) else: for x in sys.argv[1:]: if x in jisho: jisho[x]() else: hiragana = romkan.to_hiragana(x) katakana = romkan.to_katakana(x) for kanji in jisho: if x in jisho[kanji].meaning: if verbose is False: print(kanji, "=", x) else: jisho[kanji]() elif hiragana in jisho[kanji].kunyomi: if verbose is False: print(kanji, "->", hiragana) else: jisho[kanji]() elif katakana in jisho[kanji].onyomi: if verbose is False: print(kanji, "~>", katakana) else: jisho[kanji]()
def process(cls, v: str): v = " ".join(cls.reading_form(v)) # v = jaconv.kata2hira(v, ignore="") # 半角カタカナに反応しないため、事前に半角は処理しておく v = romkan.to_hepburn(v) v = romkan.to_hiragana(v) # ローマ字からしか反応しない # v = re.sub(" +", " ", v) return v
def map_dict_form_to_different_ending(verb, romaji_ending, *special_endings): '''Generates Godan verb stem and computes the correct particle to attach based on the verb's last kana Args: verb (str): Japanese verb in kana, might contain kanji romaji_ending (str): target sound of the particle to append to the verb *special_endings: Variable length argument list. Based on the target Godan particle class (-a, -e, -i, -o). Order of particles is -u / -tsu / -su. Returns: str: verb stem with the correct particle attached depending on the last kana particle of the Godan verb ''' last_kana = splice_verb(verb, VerbClass.GODAN, False) verb_stem = splice_verb(verb, VerbClass.GODAN) if last_kana == U_PARTICLE: return "{}{}".format(verb_stem, special_endings[0]) elif last_kana == TSU_PARTICLE: return "{}{}".format(verb_stem, special_endings[1]) elif last_kana == SU_PARTICLE: return "{}{}".format(verb_stem, special_endings[2]) else: transformed_last_kana_as_romaji = "{}{}".format( romkan.to_roma(last_kana)[:-1], romaji_ending) return "{}{}".format( verb_stem, romkan.to_hiragana(transformed_last_kana_as_romaji))
async def search(client, message, smk_dict): search_q = message.content.split()[1] if not is_kana(search_q[0]): search_q = to_hiragana(search_q) start = time.time() matches = dict_search(search_q, smk_dict) end = time.time() dur = end - start print(f'found {len(matches)} words in {dur} seconds') if len(matches) == 0: embed = discord.Embed( description=f'No results found for **{search_q}**!', color=0x62f7f7) await message.channel.send(embed=embed) return None pages = [[matches[0]]] current_page = 0 for i in range(1, len(matches)): if (reduce(lambda acc, v: acc + len(v[2]), pages[-1], 0) + len(matches[i][2]) < 200): pages[-1].append(matches[i]) else: pages.append([matches[i]]) embed = create_page(pages[current_page], search_q, current_page + 1, current_page + 1 + len(pages)) msg = await message.channel.send(embed=embed) await msg.add_reaction('⬅') await msg.add_reaction('➡') return (msg.id, SearchObj(pages, search_q, msg))
def subvert(expr, t): import romkan expr = re.sub(u'\u014D|\u00F4', 'ou', expr, re.UNICODE) expr = re.sub(u'\u016B', 'uu', expr, re.UNICODE) expr = re.sub(u'\u0113', 'ee', expr, re.UNICODE) expr = re.sub(u'\u0101', 'aa', expr, re.UNICODE) if t == KATA: kana = romkan.to_katakana(expr) else: kana = romkan.to_hiragana(expr) return kana
def clean(self): if not self.hiragana: self.hiragana = romkan.to_hiragana(self.romaji) elif not self.romaji: self.romaji = romkan.to_roma(self.hiragana).capitalize() elif not self.hiragana and not self.romaji: raise ValidationError( "You have to enter either the Hiragana or Romaji of a Word") self.slug = slugify(self.romaji)
def converter(): frase = ent_romaji.get() ent_hiraga.delete(0, tk.END) ent_hiraga.insert(0, romkan.to_hiragana(frase)) ent_kataka.delete(0, tk.END) ent_kataka.insert(0, romkan.to_katakana(frase)) pass
def lemmatize_with_mecab(expression, kanji): '''Find the first word containing kanji; return (lemma, reading).''' nodes = mecab_tagger.parseToNode(expression) while nodes: features = nodes.feature.split(',') if kanji in features[10]: lemma = features[10] reading = romkan.to_hiragana(romkan.to_roma(features[6])) return ((lemma, reading)) nodes = nodes.next raise (ValueError("Mecab failed: %s, %s" % (expression, kanji)))
def lemmatize_with_mecab(expression, kanji): '''Find the first word containing kanji; return (lemma, reading).''' nodes = mecab_tagger.parseToNode(expression) while nodes: features = nodes.feature.split(',') if kanji in features[10]: lemma = features[10] reading = romkan.to_hiragana(romkan.to_roma(features[6])) return((lemma, reading)) nodes = nodes.next raise(ValueError("Mecab failed: %s, %s" % (expression, kanji)))
def romaji_to_kana(request): """ Converts romaji in either katakana or hiragana. """ word = request.query_params.get('word', '')[0:1000] return Response( { 'hiragana': romkan.to_hiragana(word), 'katakana': romkan.to_katakana(word) }, status=status.HTTP_200_OK)
def submit(self): uromanji = self.input.get() if (uromanji == self.word): self.score += 1 self.response.set("Correct! ["+romkan.to_hiragana(self.word)+"] "+self.word) self.lblres.config(background="green") else: self.response.set("Incorrect! ["+romkan.to_hiragana(self.word)+"] "+self.word) self.lblres.config(background="red") sensei.tts(romkan.to_hiragana(self.word)) self.input.set("") self.word = sensei.generateWord(self.lbound, self.ubound) self.kana.set(romkan.to_hiragana(self.word)) self.curr += 1 self.round.set(str(self.score)+"/"+str(self.count)+" ("+str(self.curr)+")") if self.curr >= self.count: print str(self.score)+"/"+str(self.count)+" ("+str(self.curr)+")" self.parent.destroy()
async def ojad_phrase(client, message): search_q = ''.join(message.content.split()[1:]) if not is_kana(search_q[0]): search_q = to_hiragana(search_q) try: options = webdriver.ChromeOptions() options.add_argument('--ignore-certificate-errors') options.add_argument("headless") options.add_argument("window-size=1600x6000") options.binary_location = os.getenv('CHROME_BIN') driver = webdriver.Chrome( options=options, executable_path=os.getenv('CHROMEDRIVER_PATH')) driver.get('https://www.gavo.t.u-tokyo.ac.jp/ojad/phrasing/index') search_element = driver.find_element_by_id('PhrasingText') search_element.send_keys(search_q) search_element.submit() WebDriverWait(driver, 8).until( EC.visibility_of_element_located((By.ID, 'phrasing_main'))) element = driver.find_element_by_id("phrasing_main") location = element.location size = element.size img_filename = 'tmp/' + str(getrandbits(32)) + '.png' driver.save_screenshot(img_filename) driver.close() x = location['x'] y = location['y'] width = location['x'] + size['width'] height = location['y'] + size['height'] im = Image.open(img_filename) im = im.crop((int(x), int(y) + 55, int(width), int(height) - 50)) im.save(img_filename) print('saved image to', img_filename) await message.channel.send(file=discord.File(img_filename)) os.remove(img_filename) except Exception as e: print(e) embed = discord.Embed( description= f'An error occured while trying to query for the OJAD phrasing for **{search_q}**!', color=0x62f7f7) await message.channel.send(embed=embed)
def parseJapanese(tokens,lines,glyph): # Build the Japanese ReadingsTree<->Kanji Mappings vector = tokens[2] readings = vector.split(" ") for reading in readings[:]: try: kana=kana=romkan.to_hiragana(reading) except: kana=reading try: print glyph.encode("ascii","backslashreplace")+" "+kana.encode("ascii","backslashreplace") except: print glyph.encode("ascii","backslashreplace")+" BROKEN="+reading return
def query(self, key): title = romkan.to_hiragana(key) results = [] results.append({ "Title": title, "SubTitle": "Copy to Clipboard", "IcoPath": "Images\\icon.png", "JsonRPCAction": { "method": "copy", "parameters": [key], "dontHideAfterAction": False } }) return results
def fileWriter(rootPath): tf_str = rootPath + 'romaji.txt' result_str = rootPath + 'result.txt' t_hira_str = rootPath + 'hiragana.txt' sf_str = rootPath + 'jap.txt' text_file = open(tf_str, 'w') result_file = open(result_str, 'w') text_fileHira = open(t_hira_str, 'w') sourceFile = open(sf_str) with sourceFile as f: for line in f: if line.isspace(): continue line = line.replace('(','') line = line.replace(')','') line = line.replace(' (','') line = line.replace(') ','') line = line.replace('(','') line = line.replace(')','') result_file.write(line) if only_roman_chars(line): text_file.write(line) text_fileHira.write(line) continue line = kanji_to_romaji(line) + '.\n' lineHira = romkan.to_hiragana(line) text_file.write(line) text_fileHira.write(lineHira) if 'str' in line: break text_file.close() text_fileHira.close() result_file.close()
def default_to_hiragana(self, row, col): items = self.tableWidget.selectedItems() if items: for item in items: if item.column(): if not self.lang_mode: item.setText(romkan.to_hiragana(item.text())) item = self.tableWidget.item(row, col) if item: self.page_data['%s,%s' % (row, col)] = item.text() if col: self.page_data['%s,%s' % (row, col)] = romkan.to_roma(item.text()) self.data['page_%s' % self.window().pageLab.text()] = self.page_data self.update_config(self.data)
def replace_roman_to_kana(cls, msg: str) -> str: ''' ローマ字をかな読み文字に置換する 例)ninja→にんじゃ \n※英単語もローマ字に変換されます ''' _msg = msg # ローマ字かなの置換 for word in cls.re_roma.findall(_msg): read = romkan.to_hiragana(word) _msg = _msg.replace(word, read, 1) return _msg
def kana(self, msg, args): """Converts Romazi to kana""" result = "Am I supposed to guess the word you want?..." word = None if len(args) == 1: word = args[0] elif len(args) > 1: word = " ".join(args) if word is not None: if word.isupper(): result = romkan.to_katakana(word) elif word.islower(): result = romkan.to_hiragana(word) return result
async def romkan(self, ctx, *, text: commands.clean_content): """Convert romaji into hiragana or katakana, or vice-versa.""" if text[:3] in ["hg ", "kk ", "ro "]: tp, text = text[:2], text[3:] else: tp = ctx.invoked_with if tp == "romkan": return await ctx.send( "Please either use `!hg`, `!kk` or `!ro` (for hiragana, katakana and romaji respectively), or pass the type as an argument: `!romkan hg LyricLy wa baka desu yo`" ) if tp == "hg": await ctx.send(romkan.to_hiragana(text)) elif tp == "kk": await ctx.send(romkan.to_katakana(text)) elif tp == "ro": await ctx.send(romkan.to_hepburn(text))
def hiragana_candidates(word,num): if not isinstance(word,unicode): #unicode check word = word.decode("utf8") romaji = unicodedata.normalize("NFKC",romkan.to_roma(word)) print "romaji:{}".format(romaji) candidates = prob(romaji) + edit1_prob(romaji) + edit2_prob(romaji) if candidates: for i,word_prob_tuple in enumerate(sorted(candidates,key=lambda x :x[1],reverse=True)[:num]): romaji = word_prob_tuple[0] p = word_prob_tuple[1] kana = romkan.to_hiragana(romaji).encode("utf8") print " {} : {:<10}{:<20} {:<}".format(i+1,kana,"("+romaji+")",p) else: print "NO RESULT"
def to_hiragana(word: str): _word = word.lower() hiragana = romkan.to_hiragana(_word) valid = not re.search("[a-z']", hiragana) if hiragana.endswith("ん") and not (_word.endswith("nn") or _word.endswith("'")): partial = hiragana[:-1] + "n" else: partial = hiragana return { 'hiragana': hiragana, 'partial': partial, 'valid': valid, }
def _get_audio_entries(reading): """ Try to download audio files with given reading. :param reading: the reading :return: a list of download entries (type name: DownloadEntry) """ retrieved_entries = [] hiragana = romkan.to_hiragana(reading) field_data = JapaneseFieldData("", "", hiragana) for dloader in downloaders: dloader.language = "ja" try: dloader.download_files(field_data) except: continue retrieved_entries += dloader.downloads_list return retrieved_entries
def process_validated_user_input(self): higana_input = romkan.to_hiragana(self.validated_user_input) # Check there is only hiragana if re.match("[a-z]", higana_input): self.set_warning_msg("Invalid input !") return valid_entries_by_kanji_form, errors = self.lookup_word_entries(higana_input) if not valid_entries_by_kanji_form: if errors: # error message have a digit at the beginning, to get the most precise error error = sorted(errors)[0][1:] self.set_warning_msg(error) else: self.set_warning_msg("No match ! press Enter again to give up") self.lose_hp() self.free_joker = True return print(f"Found {len(valid_entries_by_kanji_form.keys())} valid entries for {higana_input}") for word in list(valid_entries_by_kanji_form.keys()): if word in self.words: print(f"Excluding word {word}: already used before") del valid_entries_by_kanji_form[word] if not valid_entries_by_kanji_form: self.set_warning_msg("Already used, try something else") return candidates = sorted(valid_entries_by_kanji_form.keys(), key=word_to_freqrank) if len(candidates) > 1: new_word = self.choose_word(candidates) if new_word is None: return else: new_word = candidates[0] # Special render to make the user wait ! self.render_validated_word(new_word) pygame.display.flip() # Lose the free joker if any self.free_joker = False self.add_word(new_word) pygame.event.clear() # FIXME: does not prevent "double taps"
def show_data(char): import romkan global fs html = "" for f in fs.find({'filename': '{}.jpg'.format(char)}): html += """ <a class="delete-img" data-target="{}" href="#"> <img src='/image/{}' width=80 /> </a> """.format(f.md5, f.md5) if html == "": html = "<h2>no data...</h2>" return render_template('test.html', body=unicode(""" <h1>Train Data for "{}"</h1>{} """.format(romkan.to_hiragana(char).encode('utf-8'), html), encoding='utf-8'))
async def ojad_index(client, message): search_q = message.content.split()[1] if not is_kana(search_q[0]): search_q = to_hiragana(search_q) try: options = webdriver.ChromeOptions() options.add_argument('--ignore-certificate-errors') options.add_argument("headless") options.add_argument('--lang=ja') options.add_argument("window-size=1600x1000") options.binary_location = os.getenv('CHROME_BIN') driver = webdriver.Chrome( options=options, executable_path=os.getenv('CHROMEDRIVER_PATH')) driver.get( 'http://www.gavo.t.u-tokyo.ac.jp/ojad/search/index/display:print/sortprefix:accent/narabi1:kata_asc/narabi2:accent_asc/narabi3:mola_asc/yure:visible/curve:invisible/details:invisible/limit:20/word:' + search_q) element = driver.find_element_by_xpath("//table[@id='word_table']") location = element.location size = element.size img_filename = 'tmp/' + str(getrandbits(32)) + '.png' driver.save_screenshot(img_filename) driver.close() x = location['x'] y = location['y'] width = location['x'] + size['width'] height = location['y'] + size['height'] im = Image.open(img_filename) im = im.crop((int(x), int(y), int(width), int(height))) im.save(img_filename) print('saved image to', img_filename) await message.channel.send(file=discord.File(img_filename)) os.remove(img_filename) except: embed = discord.Embed( description=f'No results found for **{search_q}**!', color=0x62f7f7) await message.channel.send(embed=embed)
def command_ja(self, event): '''Usage: ~ja <k/h/r> <arg> displays katakana/hiragana/romaji for a given argument, converting between romaji and kana''' try: dest, phrase = event.params.split(' ', 1) dest = dest.lower() if dest == 'k': resp = romkan.to_katakana(phrase) elif dest == 'h': resp = romkan.to_hiragana(phrase) elif dest == 'r': resp = romkan.to_roma(phrase.decode('utf-8')) else: raise self.send_message(event.respond, resp) except: self.send_message(event.respond, 'Invalid input, please check syntax.') raise
def hiragana_candidates(word, num): if not isinstance(word, unicode): #unicode check word = word.decode("utf8") romaji = unicodedata.normalize("NFKC", romkan.to_roma(word)) print "romaji:{}".format(romaji) candidates = prob(romaji) + edit1_prob(romaji) + edit2_prob(romaji) if candidates: for i, word_prob_tuple in enumerate( sorted(candidates, key=lambda x: x[1], reverse=True)[:num]): romaji = word_prob_tuple[0] p = word_prob_tuple[1] kana = romkan.to_hiragana(romaji).encode("utf8") print " {} : {:<10}{:<20} {:<}".format(i + 1, kana, "(" + romaji + ")", p) else: print "NO RESULT"
def lookup(rad_dawg, dict_dawg, pattern): components = [] # sanity if len(pattern) > 40 and pattern.count('[') > 20: return [] # romaji => kana pattern = re.sub('[-A-Z]+', lambda m: romkan.to_katakana(m.group(0)), pattern) pattern = re.sub('[-a-z]+', lambda m: romkan.to_hiragana(m.group(0)), pattern) for c in PATTERN_RE.findall(pattern): if c[0] == '[' and c[-1] == ']': s = rad_dawg.lookup_kanji(u''.join(c[1:-1])) components.append(s) else: components.append(set([c])) return dict_dawg.lookup_word(components)[:MAX_RESULTS]
def ask_sign(syllable): with Timer(syllable) as t: try: command = input(syllable) if command == 'q': return True elif command == 'c': t.correct() elif command == 'i': t.incorrect() print(romkan.to_hiragana(syllable)) print() except KeyboardInterrupt: return True except CaughtSignal: return True
def ocr(filename): import romkan fpath = os.path.join( os.path.abspath(os.path.dirname(__file__)), 'file', filename) datalist = chars(fpath) ocr_str = "" for (line_n, line_datas) in enumerate(datalist): result = [] likelihoods = [] for datas in line_datas: datas, small = datas s = "" # 認識結果文字列 l = 1 # 認識結果のscore(=尤度の総和**(1/文字列長)) for (i, data) in enumerate(datas): testX = encode(raw=data) rom = label[int(clf.predict(testX)[0])] proba = clf.predict_proba(testX)[0] l *= max(proba) # 尤度をかける # 小文字判定 if small[i] and rom in small_label: rom = 'x' + rom s += romkan.to_hiragana(rom) # パターンごとの文字列長の違いを吸収する l = l**(1./len(datas)) print("%s (%f)" % (s, l)) result.append(s) likelihoods.append(l) # 認識結果のscoreが最大のものを採用する like_i = np.argmax(likelihoods) ocr_str += (result[like_i] + '\n') # 選択した抽出結果を画像として出力する for img in datalist[line_n][like_i][0]: img.save('file/{}_{}.jpg'.format( line_n, datalist[line_n][like_i][0].index(img))) ocr_str = ocr_str.rstrip() print u"{}".format(ocr_str) return ocr_str
def render_prompt(self): self.prompt = self.large_font.render('>', True, BLUE) self.prompt_rect = self.prompt.get_rect(bottomleft=(0, self.screen_h)) self.screen.blit(self.prompt, self.prompt_rect) if self.user_input_value: text = romkan.to_hiragana(self.user_input_value) color = GREEN else: #elif not self.words: # First kanji ? show a message to help new players text = f"Type a word with {self.kanji_to_match}" color = GRAY # else: # text = "" # color = GREEN self.user_input = self.large_font.render(text, True, color) self.user_input_rect = self.user_input.get_rect(topleft=self.prompt_rect.topright) self.screen.blit(self.user_input, self.user_input_rect)
def play(word_map): """ TODO * Score the result * Less praise * Don't say the words """ random.shuffle(word_map) for word in word_map: speak(word[0]) print(romkan.to_hiragana(word[0])) answer = listen() print(answer) if answer == word[1]: speak(correct()) else: speak(bubu()) speak(f'The correct answer was {word[1]}')
def __setup_data(self): page = 'page_%s' % (self.window().stackedWidget.currentIndex() + 1) path = os.path.abspath(os.path.join(self.window().data_path(), page, 'config.json')) if not os.path.exists(path): return with open(path) as data_file: data = json.load(data_file) if not data: return for page in data: if page == 'topic': self.lineEdit.setText(data[page]) continue for cell, text in data[page].items(): row, col = cell.split(',') item = QTableWidgetItem(text) self.tableWidget.setItem(int(row), int(col), item) if int(col): item = QTableWidgetItem(romkan.to_hiragana(text)) self.tableWidget.setItem(int(row), int(col), item) self.data = data
def strings(input): global raw raw = input global kunrei kunrei = romkan.to_kunrei(input) # Using "kunrei" because hiragana <> katakana conversion doesn't work global hiragana hiragana = romkan.to_hiragana(kunrei) global katakana katakana = romkan.to_katakana(kunrei) global hepburn hepburn = romkan.to_hepburn(hiragana) global onoma onoma = kunrei changes_dict = {'ch': 'C', 'ty':'T', 'sy':'S', 'ny': 'N', 'zy':'Z', \ 'dj':'D', 'l':'r','xtu':'Q', 'aa':'a-','ee':'e-','ii':'i-', \ 'oo':'o-','uu':'u-'} for key in changes_dict: onoma = onoma.replace(key, changes_dict[key]) if onoma.endswith('tto'): onoma = onoma[:-3] + 'Q' return(hiragana + ' ' + katakana + ' ' + hepburn + ' ' + onoma)
def strings(input): global raw raw = input global kunrei kunrei = romkan.to_kunrei(input) # Using "kunrei" because hiragana <> katakana conversion doesn't work global hiragana hiragana = romkan.to_hiragana(kunrei) global katakana katakana = romkan.to_katakana(kunrei) global hepburn hepburn = romkan.to_hepburn(hiragana) global onoma onoma = kunrei changes_dict = {'ch': 'C', 'ty':'T', 'sy':'S', 'ny': 'N', 'zy':'Z', \ 'dj':'D', 'l':'r','xtu':'Q', 'aa':'a-','ee':'e-','ii':'i-', \ 'oo':'o-','uu':'u-'} for key in changes_dict: onoma = onoma.replace(key, changes_dict[key]) if onoma.endswith('tto'): onoma = onoma[:-3] + 'Q' return (hiragana + ' ' + katakana + ' ' + hepburn + ' ' + onoma)
def game(jisho, rounds): """Plays the random kanji game!""" correct = 0 count = rounds while count > 0: count -= 1 kanji = random.choice(list(jisho.keys())) quiz = random.randint(0, 2) if quiz == 0: answer = input("What does " + kanji + " mean? ") if answer in jisho[kanji].meaning: print("Correct! :D") correct += 1 else: print("Incorrect!", kanji, "=", jisho[kanji].meaning) if quiz == 1: answer = input("Type in one of the kunyomi of " + kanji + " : ") if romkan.to_hiragana(answer) in jisho[kanji].kunyomi: print("Correct! :D") correct += 1 else: print("Incorrect!") print(kanji, "=", jisho[kanji].kunyomi) if quiz == 2: answer = input("Type in one of the onyomi of " + kanji + " : ") if romkan.to_katakana(answer) in jisho[kanji].onyomi: print("Correct! :D") correct += 1 else: print("Incorrect!") print(kanji, "=", jisho[kanji].onyomi) print(">> You got " + str(correct) + " out of " + str(rounds) + " (" + str(100*correct/rounds) + "%)!\n")
def to_hiragana(self): """Return the reading as hiragana, even if it's On. >>> k = Kanji('柔') >>> r = Reading(k, 'ニュウ') >>> r.to_hiragana() 'にゅう' If it's not On, it's imdepotent. >>> k = Kanji('最') >>> r = Reading(k, 'もっとも') >>> r.add_examples('最も') >>> r.reading 'もっと.も' >>> r.to_hiragana() 'もっと.も' """ if self.kind == 'On': return(romkan.to_hiragana(romkan.to_roma(self.reading))) else: return(self.reading)
def to_hiragana(self): """Return the reading as hiragana, even if it's On. >>> k = Kanji('柔') >>> r = Reading(k, 'ニュウ') >>> r.to_hiragana() 'にゅう' If it's not On, it's imdepotent. >>> k = Kanji('最') >>> r = Reading(k, 'もっとも') >>> r.add_examples('最も') >>> r.reading 'もっと.も' >>> r.to_hiragana() 'もっと.も' """ if self.kind == 'On': return (romkan.to_hiragana(romkan.to_roma(self.reading))) else: return (self.reading)
def init(self): self.word = sensei.generateWord(self.lbound, self.ubound) self.kana.set(romkan.to_hiragana(self.word)) self.round.set(str(self.score)+"/"+str(self.count)+" ("+str(self.curr)+")")
def generate_search_conditions(args): '''args = command-line argument dict (argparse object)''' if args.regexp: regexp_flags = (True,) elif tt.has_regexp_special(args.query_s): regexp_flags = (False, True) else: regexp_flags = (False,) if args.field != 'auto': fields = (args.field,) else: if tt.is_kana(args.query_s): fields = ('kanji', 'reading') else: fields = ('kanji', 'reading', 'gloss') if args.extent != 'auto': extents = (args.extent,) else: extents = ('whole', 'word', 'beginning', 'partial') conditions = [] for regexp in regexp_flags: for field in fields: for extent in extents: if field == 'gloss' and extent == 'beginning' and args.extent == 'auto': # when we search for e.g. 'man' in auto guesses, we # typically don't want 'manatee' but not 'humanity' continue elif field in ('kanji', 'reading') and extent == 'word': if args.extent == 'auto': # useless combination generated, skip continue else: # useless combination requested, adjust extent = 'whole' if field == 'reading' and tt.is_latin(args.query_s): # 'reading' field auto-convert romaji to kana. as of this # writing, JMdict has no romaji in readingfields. queries = ([romkan.to_hiragana(s) for s in args.query], [romkan.to_katakana(s) for s in args.query]) # romkan will convert ASCII hyphen-minus to CJKV long 'ー' # we back-convert it in start position, to preserve FTS # operator '-'. def fix_hyphen(s): if len(s) > 1 and s[0] == 'ー': s = '-' + s[1:] return s queries = [[fix_hyphen(s) for s in query] for query in queries] else: queries = (args.query,) # TODO: add wide-char for query in queries: conditions.append(SearchConditions(args, query, regexp, field, extent)) return conditions
def _to_hiragana(self): return romkan.to_hiragana(self.word).encode("utf-8")
async def gimnew(arg, send): table = { # pinyin 'pinyins': 'zh-t-i0-pinyin', 'pinyint': 'zh-hant-t-i0-pinyin', # wubi 'wubi': 'zh-t-i0-wubi-1986', # shuangpin 'shuangpinabc': 'zh-t-i0-pinyin-x0-shuangpin-abc', 'shuangpinms': 'zh-t-i0-pinyin-x0-shuangpin-ms', 'shuangpinflypy': 'zh-t-i0-pinyin-x0-shuangpin-flypy', 'shuangpinjiajia': 'zh-t-i0-pinyin-x0-shuangpin-jiajia', 'shuangpinziguang': 'zh-t-i0-pinyin-x0-shuangpin-ziguang', 'shuangpinziranma': 'zh-t-i0-pinyin-x0-shuangpin-ziranma', # zhuyin 'zhuyin': 'zh-hant-t-i0-und', # for blackberry layout 'zhuyinbb': 'zh-hant-t-i0-und', # cangjie 'cangjie': 'zh-hant-t-i0-cangjie-1982', # yue 'yue': 'yue-hant-t-i0-und', # ja 'ja': 'ja-t-ja-hira-i0-und', } alias = { # default 'chs': 'pinyins', 'cht': 'pinyint', 'pinyin': 'pinyins', 'shuangpin': 'shuangpinflypy', 'udpn': 'shuangpinziranma', # less is more 'py': 'pinyins', 'wb': 'wubi', 'sp': 'shuangpinflypy', 'zrm': 'shuangpinziranma', 'zy': 'zhuyin', 'cj': 'cangjie', # alias 'ggtt': 'wubi', 'vtpc': 'shuangpinabc', 'udpnms': 'shuangpinms', 'ulpb': 'shuangpinflypy', 'ihpl': 'shuangpinjiajia', 'igpy': 'shuangpinziguang', 'udpnzrm': 'shuangpinziranma', '5j4up=': 'zhuyin', 'rhnyoo$': 'zhuyinbb', 'oiargrmbc': 'cangjie', 'yut': 'yue', # alt 'jp': 'ja', } def parse(reg, text, f, g): line = [] pos = 0 for m in reg.finditer(text): #print('parse: {}'.format(repr(text[pos:m.start()]))) line.extend(f(text[pos:m.start()])) line.extend(g(m.group())) pos = m.end() line.extend(f(text[pos:])) return line def replace(text, rule): if not rule: return text (f, t) = rule[0] parts = text.split(f) return t.join(replace(part, rule[1:]) for part in parts) try: lang = arg['lang'] or 'chs' lang = alias.get(lang, lang) itc = table[lang] except: #raise Exception("this method is not supported yet...") raise Exception("Do you REALLY need this input method?") if lang == 'zhuyin': sep = re.compile(r"([^a-z'0-9\-;,./=]+)") comment = re.compile(r"(?:(?<=[^a-z'0-9\-;,./=])|^)''(.*?)''(?:(?=[^a-z'0-9\-;,./=])|$)") elif lang == 'zhuyinbb': sep = re.compile(r"([^a-z'0$]+)") comment = re.compile(r"(?:(?<=[^a-z'0$])|^)''(.*?)''(?:(?=[^a-z'0$])|$)") elif lang == 'ja': sep = re.compile(r"([^a-z'\-]+)") comment = re.compile(r"(?:(?<=[^a-z'\-])|^)''(.*?)''(?:(?=[^a-z'\-])|$)") else: sep = re.compile(r"([^a-z']+)") comment = re.compile(r"(?:(?<=[^a-z'])|^)''(.*?)''(?:(?=[^a-z'])|$)") text = arg['text'] line = parse(comment, text, lambda t: parse(sep, t, #lambda x: [(True, e) for e in x.split("'")] if x != '' and x[0].islower() else [(False, x)], # for zhuyin lambda x: [(True, e) for e in x.split("'")] if x != '' else [(False, x)], lambda x: [(False, x)] ), lambda t: [(False, t[2:-2])] ) if lang == 'ja': tmp = [] for e in line: if e[0]: tmp.append((e[0], romkan.to_hiragana(e[1]))) else: tmp.append(e) line = tmp elif lang == 'zhuyinbb': tmp = [] for e in line: if e[0]: t = [ ('aa', 'z'), ('dd', 'f'), ('ee', 'r'), ('ii', 'o'), ('jj', ','), ('kk', '.'), ('ll', '/'), ('oo', 'p'), ('qq', 'q'), ('rr', 't'), ('ss', 'x'), ('uu', 'i'), ('ww', 'w'), ('xx', 'v'), ( 'a', 'a'), ( 'b', 'm'), ( 'c', 'b'), ( 'd', 'd'), ( 'e', 'e'), ( 'f', 'g'), ( 'g', 'h'), ( 'h', 'j'), ( 'i', '9'), ( 'j', 'k'), ( 'k', 'l'), ( 'l', ';'), ( 'm', '7'), ( 'n', '4'), ( 'o', '0'), ( 'p', '-'), ( 'q', '1'), ( 'r', '5'), ( 's', 's'), ( 't', 'y'), ( 'u', '8'), ( 'v', 'n'), ( 'w', '2'), ( 'x', 'c'), ( 'y', 'u'), ( 'z', '6'), ( '0', '3'), ( '$', '='), ] tmp.append((e[0], replace(e[1], t))) else: tmp.append(e) line = tmp print(line) im = GIMNEW(itc) await im(line, send)
async def kana(arg, send): send(romkan.to_hiragana(arg['romaji']))
def __convertToHiragana(self, match): if match.group() == "n": return "n" return to_hiragana(match.group())
def copy(self, text): clipboard.copy(romkan.to_hiragana(text))
alt = '"TI"' elif i == 17: char = 'TSU' alt = '"TU"' elif i == 52: char = 'JI' alt = '"ZI"' elif i == 57: char = 'JI' alt = '"DI"' elif i == 58: char = 'ZU' alt = '"DU"' if alt is None: alt = 'null' fixture = '{ "model": "kana.character", "pk": %(pk)s, "fields": { "romaji": "%(romaji)s", ' \ '"hiragana": "%(hiragana)s", "katakana": "%(katakana)s", "is_plain": %(is_plain)s,' \ '"is_dakuten": %(is_dakuten)s, "is_handakuten": %(is_handakuten)s, "is_youon": %(is_youon)s,' \ '"gojuon_row": %(gojuon_row)s, "gojuon_col": %(gojuon_col)s, "alternate_romaji": %(alt)s, ' \ '"notes": null } },' args = { 'pk': i, 'romaji': char, 'hiragana': romkan.to_hiragana(char), 'katakana': romkan.to_katakana(char), 'is_plain': str(is_plain).lower(), 'is_dakuten': str(is_dakuten).lower(), 'is_handakuten': str(is_handakuten).lower(), 'is_youon': str(is_youon).lower(), 'gojuon_row': str(gojuon_row), 'gojuon_col': str(gojuon_col), 'alt': alt } print(fixture % args)
import sys import json import romkan poems = json.loads(sys.stdin.read()) for poem in poems: for line in poem[u'Japanese'][u'Text']: print romkan.to_hiragana(filter(lambda char: char != ' ', line)) print ''
def to_hiragana(self): self.lang_mode = 0 for item in self.get_japanese_items(): item.setText(romkan.to_hiragana(romkan.to_roma(item.text())))
def _to_hiragana(self): return romkan.to_hiragana(self.word)
import re import romkan entries = set() for i, entry in enumerate(open("edict2", encoding="euc-jp")): if i == 0: continue m = re.search("^[^/]*\\[([ぁ-んァ-ン]*)\\]", entry) if not m: continue entries.add(romkan.to_hiragana(romkan.to_roma(m.groups(1)[0]))) w = open("./hira.list", "w") for e in entries: w.write(e+"\n") w.close()
import sqlite3 import sys import romkan con = sqlite3.connect('jdict.sqlite') con.text_factory = str cur = con.cursor() if len(sys.argv) != 2: print "usage: python dict.py word" sys.exit("error: bad arguments") word = sys.argv[1] jpq = 'SELECT * FROM edict WHERE word LIKE "' + word + '"' rq = 'SELECT * FROM edict WHERE kana LIKE "' + romkan.to_hiragana(unicode(word, 'utf-8')) + '"' enq = 'SELECT * FROM edict WHERE english LIKE "%' + word + '%"' cur.execute(jpq) rows = cur.fetchall() for row in rows: print row[0] + '【' + row[1] + '】' + row[2] +'\n' cur.execute(rq) rows = cur.fetchall() for row in rows: print row[0] + '【' + row[1] + '】' + row[2] +'\n' cur.execute(enq) rows = cur.fetchall() for row in rows:
def normalizeInput(self, text): text = romkan.to_hiragana(text.replace(" ", "")) text = romkan.katakana_to_hiragana(text.lower()) return text