def test_textframe(): docx = TextFrame() docx.text = "This is the mail [email protected] ,our WEBSITE is https://example.com 😊." result = docx.word_tokens() assert result == [ 'This', 'is', 'the', 'mail', 'examplegmailcom', 'our', 'WEBSITE', 'is', 'httpsexamplecom', '😊' ]
def test_textframe_remove_shortwords(): docx = TextFrame() docx.text = "This is the mail [email protected] ,our WEBSITE is https://example.com 😊." result = docx.remove_shortwords(length=3) assert result.text == "This mail example gmail WEBSITE https example"
def test_textframe_remove_userhandles(): docx = TextFrame() docx.text = "This is the tag @jesuslives use wisely " result = docx.remove_userhandles() assert result.text == "This is the tag use wisely "
def test_textframe_remove_puncts(): docx = TextFrame() docx.text = "This is the mail [email protected] ,our WEBSITE is https://example.com 😊." result = docx.remove_puncts() assert result.text == "This is the mail example@gmailcom our WEBSITE is https://examplecom 😊"
def test_textframe_remove_stopwords(): docx = TextFrame() docx.text = "This is the mail [email protected] ,our WEBSITE is https://example.com 😊." result = docx.remove_stopwords(lang='en') assert result.text == "mail [email protected] ,our WEBSITE https://example.com 😊."
def test_textframe_remove_html(): docx = TextFrame() docx.text = "This is the <h2>example for html tags</h2>" result = docx.remove_html_tags() assert result.text == "This is the example for html tags"