def test_analyzer_kuromoji_explain(self): def test_converter(result): return [{"token": x.get('token'), "pos": x.get('partOfSpeech')} for x in result.get('detail').get('analyzer').get('tokens')] result = esanpy.analyzer('今日の天気は晴れです。', analyzer='kuromoji', attributes=['partOfSpeech'], converter=test_converter) self.assertEqual([x.get('token') for x in result], ['今日', '天気', '晴れ']) self.assertEqual([x.get('pos') for x in result], ['名詞-副詞可能', '名詞-一般', '名詞-一般'])
def test_analysis_case1(self): mapping_file = os.path.abspath("tests/resources/case1_mapping_ja.txt") userdict_file = os.path.abspath("tests/resources/case1_userdict.txt") esanpy.create_analysis('case1', char_filter={ "mapping_ja_filter": { "type": "mapping", "mappings_path": mapping_file } }, tokenizer={ "kuromoji_user_dict": { "type": "kuromoji_tokenizer", "mode": "normal", "user_dictionary": userdict_file, "discard_punctuation": False } }, token_filter={ "ja_stopword": { "type": "ja_stop", "stopwords": [ "行く" ] } }, analyzer={ "kuromoji_analyzer": { "type": "custom", "char_filter": ["mapping_ja_filter"], "tokenizer": "kuromoji_user_dict", "filter": ["ja_stopword"] } } ) analysis = esanpy.get_analysis('case1') self.assertTrue('filter' in analysis, "filter exists.") self.assertTrue('ja_stopword' in analysis.get('filter'), "ja_stopword exists.") self.assertTrue('tokenizer' in analysis, "tokenizer exists.") self.assertTrue('kuromoji_user_dict' in analysis.get('tokenizer'), "kuromoji_user_dict exists.") self.assertTrue('char_filter' in analysis, "char_filter exists.") self.assertTrue('mapping_ja_filter' in analysis.get('char_filter'), "mapping_ja_filter exists.") self.assertTrue('analyzer' in analysis, "analyzer exists.") self.assertTrue('kuromoji_analyzer' in analysis.get('analyzer'), "kuromoji_analyzer exists.") result = esanpy.analyzer('①東京スカイツリーに行く', analyzer="kuromoji_analyzer", namespace='case1') self.assertEqual(result, ['1', '東京スカイツリー', 'に']) esanpy.delete_analysis('case1') analysis = esanpy.get_analysis('case1') self.assertTrue(analysis is None, "analysis is None.")
def test_analyzer_standard_explain(self): def test_converter(result): return [{"token": x.get('token'), "start_offset": x.get('start_offset'), "end_offset": x.get('end_offset'), "position": x.get('position'), "type": x.get('type'), "keyword": x.get('keyword')} for x in result.get('detail').get('analyzer').get('tokens')] result = esanpy.analyzer('This is a pen.', analyzer='standard', attributes=['keyword'], converter=test_converter) self.assertEqual([x.get('token') for x in result], ['this', 'is', 'a', 'pen']) self.assertEqual([x.get('start_offset') for x in result], [0, 5, 8, 10]) self.assertEqual([x.get('end_offset') for x in result], [4, 7, 9, 13]) self.assertEqual([x.get('position') for x in result], [0, 1, 2, 3]) self.assertEqual([x.get('type') for x in result], ['<ALPHANUM>', '<ALPHANUM>', '<ALPHANUM>', '<ALPHANUM>']) self.assertEqual([x.get('keyword') for x in result], [None, None, None, None])
def test_analyzer_kuromoji(self): result = esanpy.analyzer('今日の天気は晴れです。', analyzer='kuromoji') self.assertEqual(result, ['今日', '天気', '晴れ'])
def test_analyzer_standard(self): result = esanpy.analyzer('This is a pen.', analyzer='standard') self.assertEqual(result, ['this', 'is', 'a', 'pen'])
def test_analyzer_default(self): result = esanpy.analyzer('This is a pen.') self.assertEqual(result, ['this', 'is', 'a', 'pen'])
import esanpy print("hello,world") esanpy.start_server() print("server start...") tokens = esanpy.analyzer("今日の天気はハレです。", analyzer="kuromoji") print(tokens) esanpy.stop_server()