def main(): tagger = Tagger() wakati_tagger = Tagger('-Owakati') text = '私はご飯を食べます。' result = wakati_tagger.parse(text) print('result1(parse + wakati):') print(result) print(type(result)) print() result = tagger.parse(text) print('result2(parse):') print(result) print(type(result)) print() result = wakati_tagger(text) print('result3(_call_+wakati):') print(result) print(type(result)) print(inspect.getmembers(result[0])) print(type(result[0])) print() result = tagger(text) print('result4(_call_):') print(result) print(type(result)) print(inspect.getmembers(result[0])) print(type(result[0])) print() print('DONE')
def main(): tagger = Tagger() neologd_tagger = Tagger('-d /usr/lib/x86_64-linux-gnu/mecab/dic/mecab-unidic-neologd') text = '私は、渋谷ストリームでランチを食べる。' print('unidic:') print(tagger.parse(text)) print() print('unidic-neologd:') print(neologd_tagger.parse(text)) print('DONE')
class Tokenizer(): def __init__(self): self.tagger = Tagger("-Owakati") def tokenize(self, text): tokens = self.tagger.parse(text).split(" ") return tokens
def main(): text = 'softbank' tagger = Tagger() gtagger = GenericTagger() print('Tagger:') print(tagger.parse(text)) for word in tagger(text): print(word.surface) print(word.feature) print() print('GenericTagger:') print(gtagger.parse(text)) for word in gtagger(text): print(word.surface) print(word.feature) print() print('DONE')
def test_wakati(text, wakati): tagger = Tagger('-Owakati') assert tagger.parse(text) == wakati
text = 'でないと' filter_text = text.replace(' ',''); filter_text = text.replace('?',''); filter_text = text.replace('!',''); filter_text = text.replace('...',''); filter_text = text.replace('(',''); filter_text = text.replace(')',''); text_binary = np.zeros(len(filter_text)) # 1 si des mots ont été trouvés à cet emplacement, sinon 0 text_kana = text #Texte qui sera traduit en kana tagger.parse(text) # => '麩 菓子 は 、 麩 を 主材 料 と し た 日本 の 菓子 。' def data_preparation(csv): regles = [] explications = [] for l in csv: l.strip() ligne = l.split(';') if 'null' not in ligne: regles.append(ligne[0]) explications.append(ligne[1]) return regles, explications
#!/usr/bin/env python from fugashi import Tagger tt = Tagger('-Owakati') from collections import Counter wc = Counter() for line in open('wagahai.txt'): for word in tt.parse(line.strip()).split(' '): wc[word] += 1