def test_parse_dictionary_path(self): # put path to dictionary and parse sentence. path_default_ipadic = '/usr/local/lib/mecab/dic/mecab-ipadic-neologd' if os.path.exists(path_default_ipadic): mecab_obj = MecabWrapper(dictType=None, path_dictionary=path_default_ipadic) assert mecab_obj._path_dictionary == path_default_ipadic parsed_obj = mecab_obj.tokenize(sentence=self.test_senetence, return_list=False) assert isinstance(parsed_obj, TokenizedSenetence)
def test_init_userdict(self): """* Test case - すべての辞書を利用した場合の動作を確認する """ mecab_obj = MecabWrapper(dictType='all', pathUserDictCsv=self.path_user_dict) assert isinstance(mecab_obj, MecabWrapper) res = mecab_obj.tokenize(sentence=self.test_senetence, return_list=True) assert isinstance(res, list) assert 'さくらまな' in res
def test_parse_jumandic(self): mecab_obj = MecabWrapper(dictType='jumandic') assert isinstance(mecab_obj, MecabWrapper) parsed_obj = mecab_obj.tokenize(sentence=self.test_senetence, return_list=False) assert isinstance(parsed_obj, TokenizedSenetence) for tokenized_obj in parsed_obj.tokenized_objects: if tokenized_obj.word_stem == '女優': # ドメイン:文化・芸術 is special output only in Jumandic assert 'ドメイン:文化・芸術' in tokenized_obj.analyzed_line
def test_init_userdict(self): # this test should be error response. mecab_obj = MecabWrapper(dictType='ipadic', pathUserDictCsv=self.path_user_dict) assert isinstance(mecab_obj, MecabWrapper) parsed_obj = mecab_obj.tokenize(sentence=self.test_senetence, return_list=False) assert isinstance(parsed_obj, TokenizedSenetence) is_ok = False for tokenized_obj in parsed_obj.tokenized_objects: if tokenized_obj.word_stem == 'さくらまな': is_ok = True assert is_ok
def test_neologd_parse(self): # test using neologd dictionary mecab_obj = MecabWrapper(dictType='neologd') parsed_obj = mecab_obj.tokenize(sentence=self.test_senetence) self.assertTrue(parsed_obj, TokenizedSenetence) self.assertTrue(isinstance(parsed_obj.convert_list_object(), list)) self.assertTrue(all(isinstance(mrph, str) for mrph in parsed_obj.convert_list_object())) parsed_obj = mecab_obj.tokenize(sentence=self.test_sentence2) self.assertTrue(parsed_obj, TokenizedSenetence) self.assertTrue(isinstance(parsed_obj.convert_list_object(), list)) self.assertTrue(all(isinstance(mrph, str) for mrph in parsed_obj.convert_list_object()))
def test_neologd_parse(self): """* Test case - neologd辞書で正しく分割できることを確認する """ mecab_obj = MecabWrapper(dictType='neologd') parsed_obj = mecab_obj.tokenize(sentence=self.test_senetence) self.assertTrue(parsed_obj, TokenizedSenetence) self.assertTrue(isinstance(parsed_obj.convert_list_object(), list)) self.assertTrue(all(isinstance(mrph, str) for mrph in parsed_obj.convert_list_object())) parsed_obj = mecab_obj.tokenize(sentence=self.test_sentence2) self.assertTrue(parsed_obj, TokenizedSenetence) self.assertTrue(isinstance(parsed_obj.convert_list_object(), list)) self.assertTrue(all(isinstance(mrph, str) for mrph in parsed_obj.convert_list_object()))
def test_init_userdict(self): # test when user dictionary is called mecab_obj = MecabWrapper(dictType='ipadic', pathUserDictCsv=self.path_user_dict) assert isinstance(mecab_obj, MecabWrapper) parsed_obj = mecab_obj.tokenize(sentence=self.test_senetence, return_list=True) is_ok = False for morph in parsed_obj: if u'さくらまな' == morph: is_ok = True else: pass assert is_ok
def test_default_parse(self): # test default status dictType = "ipadic" mecab_obj = MecabWrapper(dictType=dictType) assert isinstance(mecab_obj, MecabWrapper) parsed_obj = mecab_obj.tokenize(sentence=self.test_senetence, return_list=True) assert isinstance(parsed_obj, list) for morph in parsed_obj: assert isinstance(morph, str) parsed_obj = mecab_obj.tokenize(sentence=self.test_sentence2, return_list=True) assert isinstance(parsed_obj, list) for morph in parsed_obj: assert isinstance(morph, str)
def test_default_parse(self): """* Test case - デフォルトの状態で動作を確認する """ dictType = "ipadic" mecab_obj = MecabWrapper(dictType=dictType) assert isinstance(mecab_obj, MecabWrapper) parsed_obj = mecab_obj.tokenize(sentence=self.test_senetence, return_list=True) assert isinstance(parsed_obj, list) if python_version >= (3, 0, 0): for morph in parsed_obj: assert isinstance(morph, str) else: for morph in parsed_obj: assert isinstance(morph, string_types)
def test_init_alldict(self): """* Test case - すべての辞書を利用した場合の動作を確認する """ with self.assertRaises(Exception): mecab_obj = MecabWrapper(dictType='all', pathUserDictCsv=self.path_user_dict) assert isinstance(mecab_obj, MecabWrapper)
def test_parse_jumandic(self): with self.assertRaises(Exception): mecab_obj = MecabWrapper(dictType='jumandic') assert isinstance(mecab_obj, MecabWrapper)