def prep_ph2num(): kiritan_phone_mapping = {} with open(join(config["sinsy_dic"], "japanese.table"), encoding="UTF-8") as f: for l in f: s = l.strip().split() key = jaconv.hira2kata(s[0]) kiritan_phone_mapping[key] = s[1:] sinsy_phone_mapping = {} with open(join(config["sinsy_dic"], "japanese.utf_8.table"), encoding="UTF-8") as f: for l in f: s = l.strip().split() key = jaconv.hira2kata(s[0]) sinsy_phone_mapping[key] = s[1:] ph2num = {} counter = 0 for p in ["sil", "pau", "br"]: ph2num[p] = counter counter += 1 for k, v in sinsy_phone_mapping.items(): for p in v: if p not in ph2num: ph2num[p] = counter counter += 1 for k, v in kiritan_phone_mapping.items(): for p in v: if p not in ph2num: ph2num[p] = counter counter += 1 # undef ph2num["xx"] = counter return ph2num
def unify_text(texts): unification_dict = {} for text in texts: for i, word in enumerate(text): if jaconv.kata2hira(word) in unification_dict: text[i] = jaconv.kata2hira(word) elif jaconv.hira2kata(word) in unification_dict: text[i] = jaconv.hira2kata(word) else: unification_dict[word] = True return texts
def ja_parser(ja_recipe): #日本レシピを整理し、図面作成、単語ベクトル生成用データを準備する p1_data = ja_recipe p1_data = p1_data.set_index('recipename') #変な字を消す、stringとリストの変換 kako = re.compile(r'([((].+?[))])') delid = re.compile(r'ID\d{7},') graphstr = re.compile(r'\*|〇|▲|□|◇|☆|●|◆|○|◎|┗|★') p1_data.loc[:, 'ingredientlist'] = p1_data.loc[:, 'ingredient'].str.replace( kako, '').str.replace( delid, '').str.replace( graphstr, '' ).str.replace( r'\n', '' ).str.replace( r',,', '') p1_data.loc[:, 'ingredientlist'] = jaconv.hira2kata( p1_data.loc[:, 'ingredientlist'].str) p1_data.loc[:, 'ingredientlist'] = p1_data.loc[:, 'ingredientlist'].str.split( ',') #wordcoud、単語ベクトル生成用データを準備する bigsoup = p1_data.loc[:, 'ingredientlist'].to_string( index=False, header=False).replace('、', ',').replace('[', '').replace( '\n', '').replace(']', '').replace(' ', '').replace( ' ', '').replace('しょうが', '生姜').replace('☆', '').replace( 'しょうゆ', '醤油').replace('オリーブ油', 'オリーブオイル').replace( 'にんじん', '人参').replace('葱', 'ねぎ') littlesoup = bigsoup.split(',') littlesoup = pd.Series(littlesoup) frequency = (littlesoup.value_counts()[:500]) / len(ja_recipe.index) #平仮名をカタカナに変換する kataindex = [] for x in frequency.index: y = jaconv.hira2kata(x) kataindex.append(y) frequency.index = kataindex jacloudsource = frequency.to_dict() frequency = pd.Series(frequency) #ランキング、wordcloud作成、単語ベクトル生成用データの出力 return frequency, jacloudsource, p1_data
def get_yomi(word, yomi=''): def to_yomi(word): return nolmcb.parse(word).strip() word = re_symbol.sub('', word) word = jaconv.hira2kata(word) if re_katakana.search(word): return word elif re_katakana.search(yomi): return yomi yomi = jaconv.hira2kata(to_yomi(word)) if re_katakana.search(yomi): return yomi return '*'
def g2p(input_yomi): # 全て全角カタカナに変換 input_yomi = jaconv.h2z(input_yomi) input_yomi = jaconv.hira2kata(input_yomi) output_yomi = [] for i, item in enumerate(input_yomi): # 先頭に長音符がきたら読まない if i == 0 and (item == "ー" or item == "〜"): pass # 文字列の、末端で無いとき、次の文字が捨て仮名で無いか確認する elif i < len(input_yomi)-1: if input_yomi[i+1] in sutegana: youon = item+input_yomi[i+1] # 拗音の音素を出力 if youon in g2p_list: output_yomi.append(g2p_list[youon]) # 拗音ではない場合、通常の仮名の音素を出力 else: output_yomi += nonyouon_before_st(input_yomi, i) output_yomi += nonyouon_before_st(input_yomi, i+1) else: output_yomi += nonyouon(input_yomi, i, item) # 末端 else: output_yomi += nonyouon(input_yomi, i, item) output_str = " ".join(output_yomi) output_yomi = output_str.split() # 音素を出力 return output_yomi
def zen2han(input): # 半角カナにない特殊文字をまず変換 buf = [] for x in input: if x in ('ゐ', 'ヰ'): y = '\u0010' elif x in ('ゑ', 'ヱ'): y = '\u0011' elif x == 'ヵ': y = '\u0012' elif x == 'ヶ': y = '\u0013' elif x in ('ゎ', 'ヮ'): y = '\u0014' else: y = x buf.append(y) output = "".join(buf) # 半角カタカナに変換 output = jaconv.z2h(jaconv.hira2kata(output), kana=True, digit=True, ascii=True) output = output.replace('゛', '゙') # 全角濁点を半角濁点に output = output.replace('゜', '゚') # 全角半濁点を半角半濁点に return output
async def on_message(self, message): if len(message.content) == 0: return content = message.content.lower() content_normalized = jaconv.hira2kata(content[0].upper() + content[1:]) if content_normalized == 'No.???': return # filter pokemon list match = self.table[ self.table.drop(columns='No.').applymap( lambda name: name == content_normalized ).any(axis=1) ] if len(match) != 0: # display 1st pokemon of filtered list (expected that only 1 item has been extracted) match = match.drop(columns='No.str') matchseq = match.iloc[0] embed = format_pokeinfo(zip(matchseq.index, matchseq)) await message.channel.send(embed=embed) # filter move(waza) list content_normalized_lower = content_normalized.lower() match_moves = self.table_moves[ self.table_moves_katakana_lower.applymap( lambda name: name == content_normalized_lower ).any(axis=1) ] if len(match_moves) != 0: # display 1st move of filtered list (expected that only 1 item has been extracted) matchseq_moves = match_moves.iloc[0] embed = format_pokeinfo(zip(matchseq_moves.index, matchseq_moves)) await message.channel.send(embed=embed)
async def on_message(self, ctx): if ctx.author.bot: return if all([ ctx.channel.id != cs.Zyanken_room, ctx.channel.id != cs.Test_room ]): return for hand in ["グー", "チョキ", "パー"]: # グー,チョキ,パーの順に文字が含まれているか検索 if hand not in jaconv.hira2kata(jaconv.h2z(ctx.content)): continue # img, hand, msg, emoji1, emoji2 = zf.honda_to_zyanken(hand, ctx.author.id) img, hand, msg, emoji1, emoji2 = zf.honda_to_zyanken_breaktime( hand, ctx.author.id) if str(ctx.author.id) not in zf.No_reply: await ctx.add_reaction(emoji1) await ctx.add_reaction(emoji2) await ctx.channel.send(f"{ctx.author.mention} {hand}\n{msg}", file=discord.File(img), delete_after=5) if cs.Zyanken not in [roles.id for roles in ctx.author.roles]: guild = self.bot.get_guild(ctx.guild.id) await guild.get_member(ctx.author.id ).add_roles(get_role(guild, cs.Zyanken)) """ if emoji2 == "🎉" and len(zf.Former_winner) <= 5: guild = self.bot.get_guild(ctx.guild.id) await guild.get_member(ctx.author.id).add_roles(get_role(guild, cs.Winner)) if ctx.author.id not in zf.Former_winner: zf.Former_winner.append(ctx.author.id) """ break
def str_cleanUp(st): st = st.replace(" ", "") st = st.replace("・", "") st = st.replace("&", "アンド") st = jaconv.h2z(st, kana=True) st = jaconv.hira2kata(st) return st
def reverse_hirakana(string): import jaconv if is_hiragana(string): string = jaconv.hira2kata(string) elif is_katakana(string): string = jaconv.kata2hira(string) return string
def replaceName(string): string = hira2kata(z2h(string, digit=True, kana=False)) for tmp_string in trans_string_table: string = string.replace(tmp_string[0], tmp_string[1]) string = string.translate(trans_table) string = re.sub(replace_string, "", string) return string
def prep_ph2num(dic_path): if isdir(dic_path): _dic_path = join(dic_path, "japanese.utf_8.table") elif isfile(dic_path): _dic_path = dic_path phone_mapping = {} with open(_dic_path, encoding="UTF-8") as f: for label in f: s = label.strip().split() key = jaconv.hira2kata(s[0]) phone_mapping[key] = s[1:] ph2num = {} counter = 0 for p in ["sil", "pau", "br"]: ph2num[p] = counter counter += 1 for _, v in phone_mapping.items(): for p in v: if p not in ph2num: ph2num[p] = counter counter += 1 # undef ph2num["xx"] = counter return ph2num
def tester( ): # only complete function once user correctly identifies all Kanji in learned_list mastered = {} while mastered != learned_list: for i in range(0, len(learned_list)): current_card = random.choice(list(learned_list.items())) if current_card[0] in mastered.keys( ): # choosing only non-mastered Kanji continue user_answer = input( 'What does {} read as? Type in Romaji: '.format( current_card[0])) # using jaconv module. converting Romaji to Hiragana and Katakana hiragana_answer = jaconv.alphabet2kana( u'{}'.format(user_answer)) # takes Romaji --> Hiragana katakana_answer = jaconv.hira2kata( u'{}'.format(hiragana_answer)) # takes Hiragana --> Katakana if hiragana_answer in current_card[ 1] or katakana_answer in current_card[ 1]: # checking for on/kun readings print('Correct!') mastered[current_card[0]] = current_card[1] else: print('Incorrect. This reads as {}'.format(current_card[1])) print('Congratulations! You have mastered the learned_list')
def get_kanji(letter): base_url = "https://mojikiban.ipa.go.jp/mji/q" kanjis = [] #hiragana_part query = {"読み": letter} r = requests.get(url=base_url, params=query) if r.json()["find"] == False: print("NO RESULT FOUND") list = [] else: list = r.json()["results"] for i in list: moji = "\\" + i['UCS']["対応するUCS"].replace("+", "").replace( "^", "\\").lower() kanji = chr(int(i['UCS']["対応するUCS"][2:], 16)) kanjis.append([kanji, i["総画数"]]) #katakana_part another_letter = jaconv.hira2kata(letter) query = {"読み": another_letter} r = requests.get(url=base_url, params=query) if r.json()["find"] == False: print("NO RESULT FOUND") list = [] else: list = r.json()["results"] for i in list: kanji = chr(int(i['UCS']["対応するUCS"][2:], 16)) kanjis.append([kanji, i["総画数"]]) return (kanjis)
def sentence_to_tokens(sentence, is_katakana=False): ''' Parses one sentence into tokens using MeCab. Assumes UniDic CWJ 2.2.0 version of the dictionary is set as default. If is_katakana is set, then will convert hiragana to katakana before passing the string to MeCab and then finally reverting the change to the surface form. ''' unidic_features = [ 'pos1', 'pos2', 'pos3', 'pos4', 'cType', 'cForm', 'lForm', 'lemma', 'orth', 'pron', 'orthBase', 'pronBase', 'goshu', 'iType', 'iForm', 'fType', 'fForm', 'iConType', 'fConType', 'type', 'kana', 'kanaBase', 'form', 'formBase', 'aType', 'aConType', 'aModType', 'lid', 'lemma_id' ] tokens = [] if is_katakana: sentence = jaconv.kata2hira(sentence) with MeCab() as mecab: for node in mecab.parse(sentence, as_nodes=True): if not node.is_eos(): token = dict(zip(unidic_features, node.feature.split(','))) token['pos'] = token['pos1'] if token['pos2'] != '*': token['pos'] += '-' + token['pos2'] if token['pos3'] != '*': token['pos'] += '-' + token['pos3'] if token['pos4'] != '*': token['pos'] += '-' + token['pos4'] if len(token) == 7: # OOV if is_katakana: token['orth'] = jaconv.hira2kata(node.surface) else: token['orth'] = node.surface token['orthBase'] = node.surface token['lemma'] = node.surface token['oov'] = True tokens.append(token) else: if is_katakana: token['orth'] = jaconv.hira2kata(token['orth']) token['oov'] = False tokens.append(token) return tokens
def get_words_by_kana(kana_word): kana_word = jaconv.hira2kata(kana_word) # ひらがなをカタカナに変換 words = words_list[words_list["読み"] == kana_word] # 入力によって経路が存在しなくなるのを防ぐため, # 単語リストに存在しない1文字の場合には十分高いコストで読みのまま返す if len(kana_word) == 1 and len(words) == 0: words = create_empty_word(kana_word, 9999999) return words
def _kanji_to_kana(self, char): glyph = self.c.lookup_char(char).first() if glyph is None: return None romaji_on = glyph.kJapaneseKun.lower() romaji_kun = glyph.kJapaneseOn.lower() jp_on = jaconv.alphabet2kana(romaji_on).split(' ') jp_kun = jaconv.hira2kata(jaconv.alphabet2kana(romaji_kun)).split(' ') return jp_on, jp_kun, glyph.kDefinition
def open_ust(file_name): song = [] instance = {} bpm = 0.0 for strm in open(file_name, "r"): if strm.strip().startswith("["): if len(instance) > 0: if instance.get("Tempo", None): bpm = float(".".join(instance["Tempo"].split(","))) if instance.get("Lyric", None): if len(instance["Lyric"].split(" ")) > 1: instance["Lyric"] = instance["Lyric"].split(" ")[-1] if "R" in instance["Lyric"] or "息" in instance["Lyric"]: instance["Lyric"] = "" if hira_p.search(unicode(instance["Lyric"])): instance["Lyric"] = re.sub("[A-Za-z]+", "", instance["Lyric"]) else: instance["Lyric"] = romkan.to_katakana( instance["Lyric"]) instance["Lyric"] = re.sub( u"[^ァ-ン]", "", unicode(instance["Lyric"])).encode("utf8") instance["Tempo"] = str(bpm) if "NoteNum" in instance: if instance["Lyric"] == "": instance["NoteNum"] = "rest" song.append(instance) instance = {"InstanceIdx": strm.strip().lstrip("[#").rstrip("]")} else: if len(strm.strip().split("=")) < 2: continue key, value = strm.strip().split("=") if key == "Lyric": value = jaconv.hira2kata(unicode(value)).encode("utf8") if key in ("NoteNum", "Lyric", "Length", "Tempo"): instance[key] = value if len(instance) > 0: if instance.get("Tempo", None): bpm = float(".".join(instance["Tempo"].split(","))) if instance.get("Lyric", None): if len(instance["Lyric"].split(" ")) > 1: instance["Lyric"] = instance["Lyric"].split(" ")[-1] if "R" in instance["Lyric"] or "息" in instance["Lyric"]: instance["Lyric"] = "" if hira_p.search(unicode(instance["Lyric"])): instance["Lyric"] = re.sub("[A-Za-z]+", "", instance["Lyric"]) else: instance["Lyric"] = romkan.to_katakana(instance["Lyric"]) instance["Lyric"] = re.sub(u"[^ァ-ン]", "", unicode( instance["Lyric"])).encode("utf8") instance["Tempo"] = str(bpm) if "NoteNum" in instance: if instance["Lyric"] == "": instance["NoteNum"] = "rest" song.append(instance) return song
def get_names(filename: str, gender: str): with open(filename, newline='') as csvfile: reader = csv.DictReader(csvfile) for row in reader: name = row['name'] name_simplified = simplify_swedish_name(row['name']) count = normalize_int(row['count']) katakana = jaconv.hira2kata(jaconv.alphabet2kana(name_simplified)) if is_simple_katakana(katakana): yield { 'name': name, 'katakana': katakana, 'count': count, 'gender': gender }
def add_ojisan(parsed: list): word_list = [] for i, word in enumerate(parsed): if '人名' and '固有名詞' in word[3]: word_list.append(word[0]) word_list.append('チャン') if random.randint(0, 2) == 0: word_list.append(random_emote()) elif '係助詞' in word[3]: word_list.append(word[0]) word_list.append(random_emote()) word_list.append('、') elif '助動詞' in word[3] and '特殊' not in word[4]: # catches ない to not kanakanalise it. word_list.append(jaconv.hira2kata(word[0])) word_list.append(random_emote()) elif '終助詞' in word[3]: word_list.append(jaconv.hira2kata(word[0])) word_list.append(random_emote()) elif '接尾' and '人名' in word[3]: pass elif '接続助詞' in word[3]: word_list.append(word[0]) word_list.append(random_emote()) elif word[0] == ('?' or '?'): word_list.append('❓') elif word[0] == ('!' or '!'): word_list.append('❗') else: word_list.append(word[0]) return ''.join(word_list)
def convert_hira_to_kana(user_message_hira): re_hira = re.compile(r'[\u3041-\u3093]+') remove_bar = user_message_hira.replace('ー', '') if re_hira.fullmatch(remove_bar): converted_kana = jaconv.hira2kata(user_message_hira) print(converted_kana) return converted_kana else: print("ひらがなを入力してください") return None
def C(self, word): try: H = float(self.df_hira[word]) except KeyError: H = 0.0 try: K = float(self.df_kata[jaconv.hira2kata(word)]) except KeyError: K = 0.0 n = 10 #tuning parameter self.Cvalue = 2 / (1 + math.exp(-n * K / (H + 1))) - 1 return self.Cvalue
def talk(): # 受け取ったmsgをもとに返事を生成 msg = jaconv.hira2kata(urllib.parse.unquote(request.args.get('msg'))) msg_data = np.zeros((1, lstm.max_encoder_seq_length, lstm.num_encoder_tokens), dtype='float32') for i, char in enumerate(msg): msg_data[0, i, lstm.input_token_index[char]] = 1. response = lstm.decode_sequence(msg_data) response = re.sub(r'[「」]', '', response) response = jaconv.kata2hira(response) if response.find('…………') != -1: response = 'ぶわーーーー!=3 =3\n' return response
def line_info_n_k_func(self): for look_up_file, f_name in zip(self.look_up_files, self.f_names): ono_lis = [] with open(self.b_name, "r") as f: for line in f: ono_lis.append(line.rstrip("\n")) ono_lis = list(set(ono_lis)) ono_lis = [jaconv.hira2kata(i) for i in ono_lis] ono_counter = {} ono_lis_st = "|".join(ono_lis) file_list = glob(look_up_file.rstrip("/")) ono = [] print(file_list) for file in tqdm(file_list): story_list = file + "/*" for story in tqdm(glob(story_list)): for i in ono_lis: ono_counter[i] = [] for data in tqdm(glob(story + "/*")): if f_name + story.lstrip( look_up_file) + "line_info_n_k.csv" in glob( f_name + "*.csv"): pass elif len(glob(story + "/*")) == 1: ono_counter = search_inside_sentence( data, ono_lis_st, ono_counter, "line_info_n_k", True) else: if self.exclude_stirngs in data: pass else: ono_counter = search_inside_sentence( data, ono_lis_st, ono_counter, "line_info_n_k", False) if f_name + story.lstrip( look_up_file) + "line_info_n_k.csv" in glob(f_name + "*.csv"): print(f_name + story.lstrip(look_up_file) + "line_info_n_k.csv") pass else: df = pd.DataFrame( [[i for i in ono_counter.values() if len(i) > 0]], index=[story], columns=[ i for i in ono_counter.keys() if len(ono_counter[i]) > 0 ]) df.to_csv(f_name + story.lstrip(look_up_file) + "line_info_n_k.csv", index=False)
def convert_kana(strings): """ kana """ result = morphological_analysis(strings, _KBM_MODEL, _KYTEA_PATH) result_strings = result.split(' ') target_array = [s.split('/')[2] for s in result_strings] join_strings = ''.join(target_array) regex = re.compile('[\u3041-\u309F]+') process_strings = regex.findall(join_strings) hiragana = ''.join(process_strings) katakana = jaconv.hira2kata(hiragana) zenkaku = jaconv.h2z(katakana, digit=True, ascii=True) return zenkaku
def talkA3rt(message): apikey = '' client = pya3rt.TalkClient(apikey) api_response = client.talk(message.body['text']) # レスポンスがokの時返事を返す if api_response['message'] == 'ok': reply_message = api_response['results'][0]['reply'] message.reply(jaconv.hira2kata(reply_message) + '…ロボ') # 普通の返事 # message.reply(reply_message) # APIエラーの時はmessageを返す else: message.reply('エラー、ウマク返事ガデキマセン [ERROR:' + api_response['message'] + ']')
def checkSite(name, yomi, soup): if not (CheckKATAKANA.checkKatakana(name)): try: soup_yomi = soup.find('h2').span.string soup_yomi = soup_yomi.replace('−', '-') soup_yomi = soup_yomi.replace('-', '-') soup_yomi = jaconv.hira2kata(soup_yomi[soup_yomi.index('(') + 1:soup_yomi.index(')')]) except ValueError: print('サイト側に読みがありません') return False except AttributeError: print('サイト側に読みがありません') return False yomi = jaconv.hira2kata(yomi) if yomi == soup_yomi: print('TRUE') return True else: print('FALSE') return False else: return True
def search_morpheme( m: MultiMorpheme, match_reading=True) -> List[Tuple[jmdict.JMDEntry, List[int]]]: pos = m.part_of_speech() has_kanji = re.search(kanji_re, m.surface()) ids = set() entries: List[jmdict.JMDEntry] = [] reading = m.reading_form() dict_reading = "".join(m.reading_form() for m in parse(m.dictionary_form())) for entry in jmdict_lookup(m.dictionary_form()).entries: if entry.idseq not in ids: ids.add(entry.idseq) entries.append(entry) matches: List[Tuple[jmdict.JMDEntry, List[int]]] = [] reading_matches: List[Tuple[jmdict.JMDEntry, List[int]]] = [] for entry in entries: if match_reading and not any( jaconv.hira2kata(r.text) in (reading, dict_reading) for r in entry.kana_forms): continue match_senses = list() senses = list() reading_matches.append((entry, list(range(len(entry.senses))))) for i, sense in enumerate(entry.senses): if not sense.pos: senses.append(i) elif any(sudachi_jmdict_pos_match(pos, p) for p in sense.pos): senses.append(i) match_senses.append(i) def sense_key(i): sense = entry.senses[i] uk_match = (has_kanji != "word usually written using kana alone" in sense.misc) common = any( ("common" in p or "futsuumeishi" in p) for p in sense.pos) has_pos = bool(sense.pos) return (uk_match, common, has_pos) senses.sort(key=sense_key, reverse=True) if match_senses: matches.append((entry, senses)) if not matches: return reading_matches return matches
def text_to_sequence(text, p=0.0): for c in [" ", " ", "「", "」", "『", "』", "・", "【", "】", "(", ")", "(", ")"]: text = text.replace(c, "") text = text.replace("!", "!") text = text.replace("?", "?") text = normalize_delimitor(text) text = jaconv.normalize(text) if p > 0: text = mix_pronunciation(text, p) text = jaconv.hira2kata(text) text = add_punctuation(text) return [ord(c) for c in text] + [_eos] # EOS
def sy2a(self, s, y): # preprocess strings s = s.strip() y = jaconv.normalize(y, "NFKC") y = jaconv.hira2kata(y) # encode s_np, y_np = self.encode_sy(s, y) s_np, y_np = self.add_batch_dim(s_np, y_np) # inference accent = self.infer(s_np, y_np)[0] yomi_and_accent = self.zip_ya(y, accent) return yomi_and_accent
def convert(mozc_map, mozc_dir, output_dir): with open(os.path.join(output_dir, 'mozc.csv'), 'w') as out_fd: for f in glob.glob(os.path.join(mozc_dir, 'src/data/dictionary_oss/dictionary*.txt')): with open(f) as in_fd: for l in in_fd: l = l.decode('utf8').strip().split('\t') (yomi, lid, rid, cost, surface) = l[:5] if lid not in mozc_map: continue (new_id, pos) = mozc_map[lid] yomi = jaconv.hira2kata(yomi) line = ','.join([surface, new_id, new_id, '0', pos, surface, yomi, yomi]) line += '\n' out_fd.write(line.encode('utf8', 'replace'))
def encode_katakana(text): """I don't think this quite works yet.""" encoded = [] for char in text: if jaconv: # try to convert japanese text to half-katakanas char = jaconv.z2h(jaconv.hira2kata(char)) # TODO: "the conversion may result in multiple characters" # If that really can happen (I am not really shure), than the string would have to be split and every single # character has to passed through the following lines. if char in TXT_ENC_KATAKANA_MAP: encoded.append(TXT_ENC_KATAKANA_MAP[char]) else: # TODO doesn't this discard all that is not in the map? Can we be sure that the input does contain only # encodable characters? We could at least throw an exception if encoding is not possible. pass return b"".join(encoded)
def normalize(s): s = jaconv.hira2kata(s).replace('・', '') s = re_symbol.sub('', s) return re_tyouon.sub('ー', s)
def test_hira2kata(): assert_equal(jaconv.hira2kata('ともえまみ'), 'トモエマミ') assert_equal(jaconv.hira2kata('まどまぎ', ignore='ど'), 'マどマギ') _compare(jaconv.hira2kata, HIRAGANA, FULL_KANA)
def normalize_yomi(self, yomi): yomi = jaconv.hira2kata(yomi) return yomi.replace('ウ゛', 'ヴ').replace(' ', '')