Example #1
0
def test_preprocessing_handles_hashtags():
    """
    Replaces hashtags with text
    """
    text = "esto es #UnaGenialidad"

    assert preprocess_tweet(text) == "esto es una genialidad"
Example #2
0
def test_preprocessing_replaces_users():
    """
    Replaces handles with special token for user
    """
    text = "@perezjotaeme debería cambiar esto"

    assert preprocess_tweet(text) == "[USER] debería cambiar esto"
Example #3
0
def test_shortens_laughters():
    """
    Replaces laughters
    """

    text = "hahahhahaha can't believe it ahahahahahah"
    assert preprocess_tweet(text, lang="en") == "haha can't believe it haha"
Example #4
0
def test_shortens_laughters():
    """
    Replaces laughters
    """

    text = "jajajajaajjajaajajaja no lo puedo creer ajajaj"
    assert preprocess_tweet(text) == "jaja no lo puedo creer jaja"
Example #5
0
def test_shortens_repeated_characters():
    """
    Replaces urls with special token for url
    """
    text = "no entiendo naaaaaaaadaaaaaaaa"

    assert preprocess_tweet(text, shorten=2) == "no entiendo naadaa"
Example #6
0
def test_preprocessing_replaces_urls():
    """
    Replaces urls with special token for url
    """
    text = "esto es muy bueno http://bit.ly/sarasa"

    assert preprocess_tweet(text) == "esto es muy bueno [URL]"
def my_preprocess(*args):
    return preprocess_tweet(*args, **{
        "user_token": "USUARIO",
        "url_token": "URL",
        "hashtag_token": "hashtag",
        "emoji_wrapper": "",
    })
Example #8
0
def preprocess(tweet):
    """
    My preprocess
    """
    ret = preprocess_tweet(tweet, **preprocess_args)
    ret = re.sub("\n+", ". ", ret)
    ret = re.sub(r"\s+", " ", ret)
    return ret.strip()
Example #9
0
def test_replaces_emoji_in_english():
    """

    Replaces “ -> "

    """
    text = "🤣"
    assert preprocess_tweet(
        text, lang="en") == ' [EMOJI] rolling on the floor laughing [EMOJI] '
Example #10
0
def test_replaces_emoji():
    """

    Replaces “ -> "

    """
    text = "🤣"
    assert preprocess_tweet(
        text) == ' [EMOJI] cara revolviéndose de la risa [EMOJI] '
Example #11
0
def test_replaces_odd_quotation_marks():
    """

    Replaces “ -> "

    """
    text = "Pero pará un poco, “loquita”"

    assert preprocess_tweet(text) == 'Pero pará un poco, "loquita"'
def my_preprocess(tweet):

    ret = preprocess_tweet(tweet, **preprocess_args)
    ret = re.sub("\n+", ". ", ret)
    ret = re.sub(r"\s+", " ", ret)
    return ret.strip()