예제 #1
0
def test_textframe():
    docx = TextFrame()
    docx.text = "This is the mail [email protected] ,our WEBSITE is https://example.com 😊."
    result = docx.word_tokens()
    assert result == [
        'This', 'is', 'the', 'mail', 'examplegmailcom', 'our', 'WEBSITE', 'is',
        'httpsexamplecom', '😊'
    ]
예제 #2
0
def read_txt(filename):
    """
    Read a Text File and Create A TextFrame From it


    Parameters
    ----------
    text : Main Text
    filename : file with text to read

    Returns
    ----------
    Returns a TextFrame for text
    """
    with open(filename, "r") as f:
        text_read = f.read()
        docx_tf = TextFrame(text_read)
    return docx_tf
예제 #3
0
def test_textframe_remove_shortwords():
    docx = TextFrame()
    docx.text = "This is the mail [email protected] ,our WEBSITE is https://example.com 😊."
    result = docx.remove_shortwords(length=3)
    assert result.text == "This mail example gmail WEBSITE https example"
예제 #4
0
def test_textframe_remove_userhandles():
    docx = TextFrame()
    docx.text = "This is the tag @jesuslives use wisely "
    result = docx.remove_userhandles()
    assert result.text == "This is the tag   use wisely "
예제 #5
0
def test_textframe_remove_puncts():
    docx = TextFrame()
    docx.text = "This is the mail [email protected] ,our WEBSITE is https://example.com 😊."
    result = docx.remove_puncts()
    assert result.text == "This is the mail example@gmailcom our WEBSITE is https://examplecom 😊"
예제 #6
0
def test_textframe_remove_stopwords():
    docx = TextFrame()
    docx.text = "This is the mail [email protected] ,our WEBSITE is https://example.com 😊."
    result = docx.remove_stopwords(lang='en')
    assert result.text == "mail [email protected] ,our WEBSITE https://example.com 😊."
예제 #7
0
def test_textframe_remove_html():
    docx = TextFrame()
    docx.text = "This is the <h2>example for html tags</h2>"
    result = docx.remove_html_tags()
    assert result.text == "This is the example for html tags"