def process(cls, v: str): v = " ".join(cls.reading_form(v)) # v = jaconv.kata2hira(v, ignore="") # 半角カタカナに反応しないため、事前に半角は処理しておく v = romkan.to_hepburn(v) v = romkan.to_hiragana(v) # ローマ字からしか反応しない # v = re.sub(" +", " ", v) return v
def romanize(string): # control for 2, 3 string = re.sub('\d', '', string) if not re.match(ur'[\u3040-\u30FF]+', string): return string elif len(string)==1: string = romkan.to_hepburn(string) if string == 'n': string = 'nn'
def is_expected_key(self, code): """ タイプされたキーが期待されているキーか確認する。 :param code: タイプされたキー :return: 正しい場合はTrue """ if len(self.target_roma) == 0: return False if not Romautil.is_halfway(self.target_kana, self.target_roma): first_character = self.target_kana[:1] kunrei = romkan.to_kunrei(first_character) hepburn = romkan.to_hepburn(first_character) optimized = Romautil.hira2roma(first_character) if kunrei[0] == "x": return self.is_exactly_expected_key(code) if kunrei[0] == code: print("Kunrei, approve.") return True elif hepburn[0] == code: print("Hepburn, approve.") self.target_roma = hepburn + self.target_roma[len(kunrei):] return True elif optimized[0] == code: print("Optimized, approve.") self.target_roma = optimized + self.target_roma[len(kunrei):] return True else: print("kunrei nor hepburn, deny.") return False else: return self.is_exactly_expected_key(code)
def romaji(self): """Returns the reading as rōmaji (romanized transcription). Uses lowercase for kun readings, uppercase for on, and titlecase for exceptional readings (TODO): >>> k = Kanji('嫌') >>> r1 = Reading(k, reading='ケン') >>> r1.romaji() 'KEN' >>> r2 = Reading(k, reading=' ゲン') >>> r2.romaji() # self.uncommon isn't marked in romaji 'GEN' >>> r3 = Reading(k, reading='いや') >>> r3.romaji() 'iya' """ hepburn = romkan.to_hepburn(self.reading) if self.kind == 'On': return (hepburn.upper()) elif self.kind == 'Kun': return (hepburn) else: return (hepburn.title())
def romaji(self): """Returns the reading as rōmaji (romanized transcription). Uses lowercase for kun readings, uppercase for on, and titlecase for exceptional readings (TODO): >>> k = Kanji('嫌') >>> r1 = Reading(k, reading='ケン') >>> r1.romaji() 'KEN' >>> r2 = Reading(k, reading=' ゲン') >>> r2.romaji() # self.uncommon isn't marked in romaji 'GEN' >>> r3 = Reading(k, reading='いや') >>> r3.romaji() 'iya' """ hepburn = romkan.to_hepburn(self.reading) if self.kind == 'On': return(hepburn.upper()) elif self.kind == 'Kun': return(hepburn) else: return(hepburn.title())
def downloadMp3ForVocab(vocab): soundUrl = getMp3Url(vocab['character']) mp3Name = "wk_" + romkan.to_hepburn(vocab['kana']) + ".mp3" filename = "collection.media/" + mp3Name urllib.request.urlretrieve(soundUrl, filename) print("Downloaded " + mp3Name) return mp3Name
def transform_result(result): japanese = result["japanese"] if 'reading' in japanese[0].keys(): reading = japanese[0]['reading'] elif 'word' in japanese[0].keys(): reading = japanese[0]['word'] else: reading = "No reading found" if 'word' in japanese[0].keys(): word = japanese[0]['word'] else: word = reading senses = result['senses'] english_definitions = senses[0]['english_definitions'] part_of_speech = senses[0]['parts_of_speech'][0] output = { "kanji": word, "hiragana": reading, "english_definitions": english_definitions, "part_of_speech": part_of_speech, "romaji": romkan.to_hepburn(entry['reading']) "commonality": } return output
def titleCase(str): str = romkan.to_hepburn(str.replace(u' ', ' ')) str = str.replace('oo', u'ō').replace('ou', u'ō').replace('aa', u'ā').replace('uu', u'ū'); lst = str.split(' ') for i in range(0, len(lst), 1): lst[i] = lst[i][0].upper() + lst[i][1:] return ' '.join(lst)
def parse_row(row, level): return { "kanji": row[0].text, "kana": row[1].text, "romaji": romkan.to_hepburn(row[1].text), "english_definitions": [row[2].text], "jlpt_level": level, "commonality": jlpt_commonalities[level], }
def romanizeText(mecab, text): parsed = mecab.parse(text) result = list() for token in parsed.split('\n'): splitted = token.split('\t') if len(splitted) == 2: word = splitted[0] features = splitted[1].split(',') if len(features) > 7 and features[7] != '*': result.append(romkan.to_hepburn(features[7])) else: result.append(word) return result
async def romkan(self, ctx, *, text: commands.clean_content): """Convert romaji into hiragana or katakana, or vice-versa.""" if text[:3] in ["hg ", "kk ", "ro "]: tp, text = text[:2], text[3:] else: tp = ctx.invoked_with if tp == "romkan": return await ctx.send( "Please either use `!hg`, `!kk` or `!ro` (for hiragana, katakana and romaji respectively), or pass the type as an argument: `!romkan hg LyricLy wa baka desu yo`" ) if tp == "hg": await ctx.send(romkan.to_hiragana(text)) elif tp == "kk": await ctx.send(romkan.to_katakana(text)) elif tp == "ro": await ctx.send(romkan.to_hepburn(text))
def build(self): word_list = [] dictionary_root_node = xml.etree.ElementTree.parse( self.DICTIONARY_LOCATION).getroot() print('\nGenerating Japanese word list.') for entry in tqdm(dictionary_root_node.findall('entry'), ncols=75, unit='entries'): try: reading = entry.find('.//r_ele/reb').text reading_romanized = romkan.to_hepburn(reading) word_list.append(reading_romanized) except AttributeError: pass return word_list
def split_readings(string, delimeters=(u',', u"、", u';')): """ Takes input string and does 5 things: 1. Split the string on the delimeters self.delimeters 2. Removed all non-hiragana-katakana letters 3. Removes whitespace and all empty strings 4. Converts all kana to romaji (hepburn transcription) 5. Converts unicode strings to strings (since now only romaji) returns this as a list. :param string: Input string, containing readings as hiragana/katakana, separated by delimters. :param delimeters: delimeters like ',' and ';' which separate differnt readings in $string :return: List of the readings. """ # since we are splitting with respect to multiple delimeters (e.g. ',', ';' etc.) # build the regex ",|;|..." etc. and use re..split to do the splitting. regex = '|'.join(map(re.escape, tuple(delimeters))) splitted = re.split(regex, string) kana_only = [ ''.join(re.findall(u"[\u3040-\u30ff]", split)) for split in splitted ] no_whitespace = [kana.strip() for kana in kana_only if kana.strip()] return [str(romkan.to_hepburn(kana)) for kana in no_whitespace]
def strings(input): global raw raw = input global kunrei kunrei = romkan.to_kunrei(input) # Using "kunrei" because hiragana <> katakana conversion doesn't work global hiragana hiragana = romkan.to_hiragana(kunrei) global katakana katakana = romkan.to_katakana(kunrei) global hepburn hepburn = romkan.to_hepburn(hiragana) global onoma onoma = kunrei changes_dict = {'ch': 'C', 'ty':'T', 'sy':'S', 'ny': 'N', 'zy':'Z', \ 'dj':'D', 'l':'r','xtu':'Q', 'aa':'a-','ee':'e-','ii':'i-', \ 'oo':'o-','uu':'u-'} for key in changes_dict: onoma = onoma.replace(key, changes_dict[key]) if onoma.endswith('tto'): onoma = onoma[:-3] + 'Q' return(hiragana + ' ' + katakana + ' ' + hepburn + ' ' + onoma)
def strings(input): global raw raw = input global kunrei kunrei = romkan.to_kunrei(input) # Using "kunrei" because hiragana <> katakana conversion doesn't work global hiragana hiragana = romkan.to_hiragana(kunrei) global katakana katakana = romkan.to_katakana(kunrei) global hepburn hepburn = romkan.to_hepburn(hiragana) global onoma onoma = kunrei changes_dict = {'ch': 'C', 'ty':'T', 'sy':'S', 'ny': 'N', 'zy':'Z', \ 'dj':'D', 'l':'r','xtu':'Q', 'aa':'a-','ee':'e-','ii':'i-', \ 'oo':'o-','uu':'u-'} for key in changes_dict: onoma = onoma.replace(key, changes_dict[key]) if onoma.endswith('tto'): onoma = onoma[:-3] + 'Q' return (hiragana + ' ' + katakana + ' ' + hepburn + ' ' + onoma)
#!/usr/bin/env python3 import sqlite3 import romkan if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('results_db', nargs='?', default='results.sqlite3', help='Database of words to transliterate produced by extractor.py') args = parser.parse_args() db = sqlite3.connect(args.results_db) for kanji, reading, glosses, part_of_speech in db.execute('SELECT kanji, reading, glosses, part_of_speech FROM unique_results'): print(kanji, reading, romkan.to_hepburn(reading)) 'ぁあぃいぅうぇえぉおかがきぎく'\ 'ぐけげこごさざしじすずせぜそぞた'\ 'だちぢっつづてでとどなにぬねのは'\ 'ばぱひびぴふぶぷへべぺほぼぽまみ'\ 'むめもゃやゅゆょよらりるれろゎわ'\ 'ゐゑをんゔゕゖ゛゜ゝゞゟ'
def multiscrape(self, name, shy=False): if shy and self.config.has_key(name) and self.config[name]['ja']: return if name != u'名前' and name != u'ふりがな': nodes = self.root.xpath("//_:h4[contains(text(), '%s')]/following-sibling::_:p" % name, namespaces=NS) else: nodes = self.root.xpath("//_:h3", namespaces=NS) if not nodes: return iterator = nodes[0].itertext() val = '' l = [] while 1: try: val = iterator.next() val = re.sub(u'^[ \r\n]+', '', val) val = re.sub(u'[ \r\n]+$', '', val) if val: l.append(val) except: break val = re.sub('^[ \n]*(.*?)[ \n]*$', '\\1', '\n'.join(l)) val = val.strip() val = makeHankaku(val) if name == u'名前': lst = val.split('\n') if not self.config.has_key(name): self.config[name] = {} self.config[name]['ja'] = lst[0] elif name == u'ふりがな' and not shy: if not self.config.has_key(u'名前'): self.config[u'名前'] = {} lst = val.split('\n') if len(lst) > 1: suzure = lst[1].replace(u' ', '').replace(' ', '') self.config[u'名前']['kana'] = lst[1] self.config[u'名前']['en'] = titleCase(romkan.to_hepburn(lst[1].replace(u' ', ' '))) self.config[u'並べ替え']['ja'] = romkan.to_katakana(romkan.to_kunrei(suzure)) self.config[u'並べ替え']['en'] = romkan.to_roma(suzure) else: self.config[u'名前']['kana'] = '' elif name == u'教員からのメッセージ': if not self.config.has_key(u'学部メセージ'): self.config[u'学部メッセージ'] = {} self.config[u'学部メッセージ']['ja'] = val.split('\n') elif name == u'役職': if not self.config.has_key(u'役職'): self.config[u'役職'] = {} self.config[u'役職']['ja'] = getPostJapanese(val) self.config[u'役職']['en'] = getPostEnglish(self.config[u'役職']['ja']) elif name == u'主要': if len(val.split('\n')) > 1: self.config[u'主要業績']['ja'] = val.split('\n') else: self.config[u'主要業績']['ja'] = val else: if not self.config.has_key(name): self.config[name] = {} if len(val.split('\n')) > 1: self.config[name]['ja'] = val.split('\n') if name == u'専門分野' and self.config[name]['ja'][0]: self.config[name]['en'] = fieldsMap[self.config[name]['ja'][0]] else: self.config[name]['ja'] = val if name == u'専門分野' and self.config[name]['ja']: self.config[name]['en'] = fieldsMap[self.config[name]['ja']]
kana = vocab['kana'] meaning = vocab['meaning'] note = col.newNote() note['Expression'] = kana note['Meaning'] = meaning note['Audio'] = "[sound:%s]" % mp3name note.addTag("level%d" % vocab['level']) col.addNote(note) cwd = os.getcwd() col = Collection("collection.anki2") os.chdir(cwd) #Collection() changes cwd for no goddamn reason try: vocabList = wani_reader.getVocabList() for vocab in vocabList: name = romkan.to_hepburn(vocab['kana']) if not hasCard(col, vocab['kana']): addVocabCard(col, vocab) print("Created card for " + name) time.sleep(1) else: print("-Skipped existing card " + name) finally: col.close()
def process(cls, v: str): v = romkan.to_hepburn(v) v = romkan.to_katakana(v) return v
output_file_path = os.path.join(dirname, "romaji_to_english.json") # Load the input file with open(filename) as f: minimal_results = json.load(f) count = 0 entries = [] output_data = {"entries": entries} check_dict = {} for entry in minimal_results["entries"]: romaji = romkan.to_hepburn(entry['reading']) entry['romaji'] = romaji # print(count) # print(entry) # print(romaji) count += 1 # if count > 500: # break if romaji not in check_dict: check_dict[romaji] = 0 check_dict[romaji] += 1 entries.append(entry) # print(np.mean(check_dict.values())) # print(output_data) print(check_dict.values()) plt.hist(check_dict.values())
def process(cls, v: str): v = " ".join(cls.reading_form(v)) # v = romaji_converter.do(v) v = romkan.to_hepburn(v) # v = re.sub(" +", " ", v) # 名字をわけた時スペースが重複する時があるので削除 return v
scale = ['C','C#','D','D#','E','F','F#','G','G#','A','A#','B'] def pitch_to_midi(pitch): if not re.match(r'[A-Z]#?\d', pitch): return 0 pitch = pitch.strip() return (int(pitch[-1])+1)*12 + scale.index(pitch[:-1]) def romanize(string): # control for 2, 3 string = re.sub('\d', '', string) if not re.match(ur'[\u3040-\u30FF]+', string): return string elif len(string)==1: string = romkan.to_hepburn(string) if string == 'n': string = 'nn' else: fst = romkan.to_hepburn(string[0]) snd = romkan.to_hepburn(string[1]) # tex, dex, tox, dox, and also palatals if fst[:-1] in ['t','d','ch','j','sh','v','ts','dz'] and fst[-1]!='u': string = fst[:-1]+snd[-1] # du ._. elif fst=='du': string = 'd'+snd[-1] # si/zi case elif fst in ['su','zu'] and snd=='xi': string = fst[0]+'i' # foreign y and w elif fst=='i': string = 'y'+snd[-1] elif fst=='u':
def get_yomi_str(line: str) -> str: tokenized = tokenizer.parse(line) yomi = tagger.parse(tokenized).strip() romaji = romkan.to_hepburn(yomi) toks = [x.capitalize() for x in romaji.split(' ')] return ' '.join(toks)
def execute(self, aTerm): if self.convertKana: return DictionaryDisplayElement.objects.raw(query_base.format(self.select), [self.order, romkan.to_katakana(romkan.to_hepburn(aTerm))]) else: return DictionaryDisplayElement.objects.raw(query_base.format(self.select), [self.order, aTerm])
def romanize(word): return romkan.to_hepburn(word)
async def romaji(request, message, delete=None, delete2=None): await client.send_typing(message.channel) try: urequest = kroman.parse(request).replace('-', '') except: urequest = request url = 'http://jlp.yahooapis.jp/FuriganaService/V1/furigana?appid=' + yahooAppID + '&grade=1&sentence=' + quote_plus(urequest) with urlopen(url) as f: r = xmltodict.parse(f.read()) words = r['ResultSet']['Result']['WordList']['Word'] mainMessage = '[' + message.author.display_name + ']\n**Original:** ' if '\n' in request: mainMessage += request.split('\n',1)[0][:32] + "..." else: mainMessage += request[:32] if len(request) > 32: mainMessage += '...' mainMessage += '\n**Romanised:** ' if ('Furigana' in words or 'Surface' in words): if ('Furigana' in words): word = romkan.to_hepburn(words['Furigana']) if ('word' == 'ha'): word = 'wa' mainMessage += word else: word = romkan.to_hepburn(words['Surface']) if ('word' == 'ha'): word = 'wa' mainMessage += word else: lastchar = '' i = 0 for x in words: if (i > 0 and lastchar != 'xtsu'): mainMessage += ' ' if ('Furigana' in words[i]): try: word = romkan.to_hepburn(words[i]['Furigana']) if (word == 'ha'): word = 'wa' if (lastchar == 'xtsu'): word = word[:1] + word lastchar = '' if (words[i]['Furigana'].endswith('っ') or words[i]['Furigana'].endswith('ッ') or word.endswith('xtsu')): lastchar = 'xtsu' word = word.replace('xtsu', '') except: word = words[i]['Furigana'] if not word: word = '' mainMessage += word else: try: word = romkan.to_hepburn(words[i]['Surface']) if (word == 'ha'): word = 'wa' if (lastchar == 'xtsu'): word = word[:1] + word lastchar = '' if (word.endswith('xtsu')): lastchar = 'xtsu' word = word.replace('xtsu', '') except: word = words[i]['Surface'] if not word: word = '' mainMessage += word i += 1 await client.send_message(message.channel, mainMessage) await client.delete_message(message) if (delete): await client.delete_message(delete) if (delete2): await client.delete_message(delete2)