Esempio n. 1
0
def replace_urls_w_placeholder(s: TextSeries) -> TextSeries:
    copy = s.copy()
    url_pattern = r"(http\S+)"
    urls_found_list = copy.str.extractall(
        url_pattern).reset_index()[0].unique()
    for url in urls_found_list:
        copy = copy.str.replace(url, _add_url_placeholder(url), regex=False)
    return copy
Esempio n. 2
0
def replace_mentions_w_placeholder(s: TextSeries) -> TextSeries:
    copy = s.copy()
    mention_pattern = r"(@[a-zA-Z0-9]+)"
    mentions_found_list = copy.str.extractall(
        mention_pattern).reset_index()[0].unique()
    for mention in mentions_found_list:
        copy = copy.str.replace(mention,
                                _add_mention_placeholder(mention),
                                regex=False)
    return copy
Esempio n. 3
0
def replace_hashtags_w_placeholder(s: TextSeries) -> TextSeries:
    copy = s.copy()
    hashtag_pattern = r"(#[a-zA-Z0-9_]+)"
    hashtags_found_list = copy.str.extractall(
        hashtag_pattern).reset_index()[0].unique()
    for hashtag in hashtags_found_list:
        copy = copy.str.replace(hashtag,
                                _add_hashtag_placeholder(hashtag),
                                regex=False)
    return copy