コード例 #1
0
ファイル: methods.py プロジェクト: divisia/imajiner
    def clean(cls, text):
        """Cleans text for language classification.

        Args:
            text (str): Source text

        Returns:
            str: Cleaned text
        """
        import cleantext

        cleaned = text
        cleaned = cleantext.replace_urls(cleaned, replace_with='')
        cleaned = cleantext.replace_emails(cleaned, replace_with='')
        return cleaned
コード例 #2
0
ファイル: test_clean.py プロジェクト: raoden1/clean-text
def test_replace_emails():
    text = "I can be reached at [email protected] through next Friday."
    proc_text = "I can be reached at *EMAIL* through next Friday."
    assert cleantext.replace_emails(text, "*EMAIL*") == proc_text
コード例 #3
0
# !pip install clean-text[gpl]

import cleantext

# replacing urls 
text = "www.stackoverflow.com is an amzing website"
cleantext.replace_urls(text, "<URL>")
>>>'<URL> is an amzing website'

# replacing emails
text = "My email id is [email protected]"
cleantext.replace_emails(text, "<EMAIL>")
>>>'My email id is <EMAIL>'
コード例 #4
0
def test_not_email_addresses():
    for x in not_email_addresses:
        assert cleantext.replace_emails(x, "*EMAIL*") != "*EMAIL*"