コード例 #1
0
def test_textframe():
    docx = TextFrame()
    docx.text = "This is the mail [email protected] ,our WEBSITE is https://example.com 😊."
    result = docx.word_tokens()
    assert result == [
        'This', 'is', 'the', 'mail', 'examplegmailcom', 'our', 'WEBSITE', 'is',
        'httpsexamplecom', '😊'
    ]
コード例 #2
0
def test_textframe_remove_shortwords():
    docx = TextFrame()
    docx.text = "This is the mail [email protected] ,our WEBSITE is https://example.com 😊."
    result = docx.remove_shortwords(length=3)
    assert result.text == "This mail example gmail WEBSITE https example"
コード例 #3
0
def test_textframe_remove_userhandles():
    docx = TextFrame()
    docx.text = "This is the tag @jesuslives use wisely "
    result = docx.remove_userhandles()
    assert result.text == "This is the tag   use wisely "
コード例 #4
0
def test_textframe_remove_puncts():
    docx = TextFrame()
    docx.text = "This is the mail [email protected] ,our WEBSITE is https://example.com 😊."
    result = docx.remove_puncts()
    assert result.text == "This is the mail example@gmailcom our WEBSITE is https://examplecom 😊"
コード例 #5
0
def test_textframe_remove_stopwords():
    docx = TextFrame()
    docx.text = "This is the mail [email protected] ,our WEBSITE is https://example.com 😊."
    result = docx.remove_stopwords(lang='en')
    assert result.text == "mail [email protected] ,our WEBSITE https://example.com 😊."
コード例 #6
0
def test_textframe_remove_html():
    docx = TextFrame()
    docx.text = "This is the <h2>example for html tags</h2>"
    result = docx.remove_html_tags()
    assert result.text == "This is the example for html tags"