def test_DocxParser(self): docx = DocxParser() docx.parse('files/Test.docx') text = [ 'i', 'test', 'poop', 'test', 'anim', 'test', 'anim', 'googl', 'link' ] assert docx.get_processed_stems() == text
def test_get_processed_stems(self): abstract = DocxParser() abstract.parse('files/Test.docx') text = [ 'i', 'test', 'poop', 'test', 'anim', 'test', 'anim', 'googl', 'link' ] assert abstract.get_processed_stems() == text
def test_get_stems(self): abstract = DocxParser() abstract.parse('files/Test.docx') text = [ 'i', 'am', 'test', 'here', 'poops,', 'test', 'animals,', 'test', 'anim', 'something.', 'googl', 'link' ] assert abstract.get_stems() == text
def docx_test(): docx_parser = DocxParser() docx_parser.parse(r'D:\Test22.docx') print(docx_parser.get_processed_stems())
def docx(): docx_parser = DocxParser() docx_parser.parse(r'files/Test2.docx') print('docx parser', docx_parser.get_processed_stems(), len(docx_parser.get_processed_stems()))
def test_get_processed_text(self): abstract = DocxParser() abstract.parse('files/Test.docx') text = "I test poops Test animals tests animal google link" assert abstract.get_processed_text() == text
def test_get_text(self): abstract = DocxParser() abstract.parse('files/Test.docx') text = "I am test here poops, Test animals, tests animal something. google link" assert abstract.get_text() == text