def test_retokenize(self, conjugation): doc = Doc(TEXT, conjugation) doc.retokenize() assert doc.get_text() == TEXT text = "今日はいい天気ですね。" doc.retokenize(text) assert doc.get_text() == text
def test(): doc = Doc("本を書きました。") # print surface forms of the tokens. surfaces = [word.surface for word in doc.words] print("/".join(surfaces)) # 本/を/書き/まし/た/。 # print plain text print(doc.get_text()) # 本を書きました。 # delete a word doc.delete(3) # Word conjugation will be done as needed. print(doc.get_text()) # 本を書いた。 # update a word word = doc.conjugation.tokenize("読む") # In addition to conjugation, transform the peripheral words as needed. doc.update(2, word) print(doc.get_text()) # 本を読んだ。
def test_delete(self, conjugation, text, interval, expect): doc = Doc(text, conjugation) doc.delete(interval) assert doc.get_text() == expect
def test_insert(self, conjugation, text, surfaces, i, expect): doc = Doc(text, conjugation) words = conjugation.tokenize(surfaces) doc.insert(i, words) assert doc.get_text() == expect
def test_insert_a_single_word(self, conjugation): doc = Doc(TEXT, conjugation) text = "公園" word = conjugation.tokenize(text)[0] doc.insert(1, word) assert doc.get_text() == SURFACES[0] + text + "".join(SURFACES[1:])
def test_conjugate_word_in_doc(self, conjugation, text, i, c_form, expect): doc = Doc(text, conjugation) doc.conjugate(i, c_form) assert doc.get_text() == expect
def test_should_be_able_to_extract_text(self, conjugation, interval, expect): doc = Doc(TEXT, conjugation) assert doc.get_text(interval) == expect
def test_should_be_able_to_initialize_with_only_a_string(self): doc = Doc(TEXT) assert doc is not None
def test_should_be_able_to_initialize_with_string(self, conjugation): doc = Doc(TEXT, conjugation) surfaces = [word.surface for word in doc.words] assert surfaces == SURFACES
def test_to_word_list(self): doc = Doc(TEXT) for dic in doc.to_word_list(): assert type(dic) == dict
def test_simple_view(self): doc = Doc(TEXT) assert len(doc.simple_view()) > 0
def test_update_surfaces(self, conjugation, text, interval, surfaces, expect): doc = Doc(text, conjugation) doc.update_surfaces(interval, surfaces) assert doc.get_text() == expect
def test_update(self, conjugation, text, interval, surfaces, expect): doc = Doc(text, conjugation) words = conjugation.tokenize(surfaces) doc.update(interval, words) assert doc.get_text() == expect
def test_update_a_single_word(self, conjugation): doc = Doc(TEXT, conjugation) text = "毎週" word = conjugation.tokenize(text)[0] doc.update(0, word) assert doc.get_text() == "毎週" + "".join(SURFACES[1:])