Ejemplo n.º 1
0
    def clean(cls, text):
        """Cleans text for language classification.

        Args:
            text (str): Source text

        Returns:
            str: Cleaned text
        """
        import cleantext

        cleaned = text
        cleaned = cleantext.replace_urls(cleaned, replace_with='')
        cleaned = cleantext.replace_emails(cleaned, replace_with='')
        return cleaned
Ejemplo n.º 2
0
def test_replace_emails():
    text = "I can be reached at [email protected] through next Friday."
    proc_text = "I can be reached at *EMAIL* through next Friday."
    assert cleantext.replace_emails(text, "*EMAIL*") == proc_text
# !pip install clean-text[gpl]

import cleantext

# replacing urls 
text = "www.stackoverflow.com is an amzing website"
cleantext.replace_urls(text, "<URL>")
>>>'<URL> is an amzing website'

# replacing emails
text = "My email id is [email protected]"
cleantext.replace_emails(text, "<EMAIL>")
>>>'My email id is <EMAIL>'
Ejemplo n.º 4
0
def test_not_email_addresses():
    for x in not_email_addresses:
        assert cleantext.replace_emails(x, "*EMAIL*") != "*EMAIL*"