예제 #1
0
def en_to_kana(msg_src):
    msg_list = re.split(r'([(a-z)(A-Z)]+)', msg_src)
    msg_list_fmt = []
    for word in msg_list:
        if alkana.get_kana(word) is None:
            msg_list_fmt.append(word)
        else:
            msg_list_fmt.append(alkana.get_kana(word))
    msg_fmt = ''.join(msg_list_fmt)
    return msg_fmt
예제 #2
0
파일: merge.py 프로젝트: taisei-s/utanohi
def countMora(m_token):
    mora = 0
    yomi = ''
    if m_token[-1] == '*':
        if alkana.get_kana(m_token[0]):
            yomi = alkana.get_kana(m_token[0])
    else:
        yomi = m_token[-1]

    if yomi == '':
        if m_token[1] == '記号' or m_token[1] == '補助記号':
            mora = 0
        else:
            mora = len(re.sub(r'[ゃゅょ]+?', '', m_token[0]))
    elif m_token[1] == '記号' or m_token[1] == '補助記号':
        mora = 0
    else:
        mora = len(re.sub(r'[ャュョ]+?', '', yomi))

    return mora
예제 #3
0
    def replace_eng_to_kana(cls, msg: str) -> str:
        '''
        英語をかな読み文字に置換する
        例)wood→うっど
        '''

        _msg = msg
        # 英語かな読み対応辞書の作成
        for word in cls.re_eng.findall(_msg):
            # alkanaは小文字検索
            read = alkana.get_kana(word.lower())
            if read is not None:
                _msg = _msg.replace(word, read, 1)

        return _msg
예제 #4
0
 def test_add_external_data(self):
     self.assertEqual(None, alkana.get_kana('mmmmm'))
     file_path = os.path.join(os.path.dirname(__file__), 'sample_external_data.csv')
     alkana.add_external_data(file_path)
     self.assertEqual('テスト', alkana.get_kana('mmmmm'))
예제 #5
0
 def test_get_kana_upper_case(self):
     self.assertEqual('ワールド', alkana.get_kana("World"))
예제 #6
0
 def test_get_kana(self):
     self.assertEqual('ハロー', alkana.get_kana('hello'))
예제 #7
0
 def test_get_kana_none_case(self):
     self.assertEqual(None, alkana.get_kana("abcdefg"))
예제 #8
0
def search_synonym(word, criteria_vowel_r):

    synonym_data = {}
    synonym_data['original'] = word

    word_yomi = mecab_get_yomi(word)
    word_phoneme = g2p(word_yomi)
    rhyme_pt = 0
    word_phoneme.reverse()
    word_vowel_r = [x for x in word_phoneme if x in vowel]
    for index, p in enumerate(criteria_vowel_r):
        if p == word_vowel_r[index]:
            rhyme_pt += 1
        else:
            break

    synonym_best = {
        'word': word,
        'rhyme_pt': rhyme_pt
    }

    # 問い合わせしたい単語がWordnetに存在するか確認する
    cur = conn.execute("select wordid from word where lemma='%s'" % word)
    word_id = 99999999  #temp
    for row in cur:
        word_id = row[0]

    # Wordnetに存在する語であるかの判定
    if word_id==99999999:
        synonym_data['synonym'] = synonym_best
        return synonym_data

    # 入力された単語を含む概念を検索する
    cur = conn.execute("select synset from sense where wordid='%s'" % word_id)
    synsets = []
    for row in cur:
        synsets.append(row[0])

    # 概念に含まれる単語を検索して画面出力する
    for synset in synsets:
        cur3 = conn.execute("select wordid from sense where (synset='%s' and wordid!=%s)" % (synset,word_id))
        for row3 in cur3:
            target_word_id = row3[0]
            cur3_1 = conn.execute("select lemma from word where wordid=%s" % target_word_id)
            for row3_1 in cur3_1:
                synonym = row3_1[0]
                if '_' in synonym:
                    continue
                else:
                    if isalpha(synonym):
                        synonym = alkana.get_kana(synonym)
                    if synonym != None:
                        synonym_yomi = mecab_get_yomi(synonym)
                        synonym_phoneme = g2p(synonym_yomi)
                        if '<unk>' in synonym_phoneme:
                            continue
                        else:
                            rhyme_pt = 0
                            # 音素を逆順にする
                            synonym_phoneme.reverse()

                            synonym_vowel_r = [x for x in synonym_phoneme if x in vowel]

                            for index in range(min(len(criteria_vowel_r), len(synonym_vowel_r))):
                                if criteria_vowel_r[index] == synonym_vowel_r[index]:
                                    rhyme_pt += 1
                                else:
                                    break

                            if rhyme_pt > synonym_best['rhyme_pt']:
                                synonym_best = {
                                    'word': synonym,
                                    'rhyme_pt': rhyme_pt
                                }

    synonym_data['synonym'] = synonym_best

    return synonym_data