Beispiel #1
0
def feature_number_of_emojis_in_tweet(df : pd.DataFrame, df_timeline : pd.DataFrame, user_id_str_col_name : str = "user_id_str", in_col_name : str = "text", out_col_name : str = "number_of_emoji") -> pd.DataFrame:
    ress = []
    for user_id_str in df[user_id_str_col_name].unique():
        nums = df_timeline.loc[df_timeline[user_id_str_col_name] == user_id_str, in_col_name].apply(lambda x: emoji_count(x)).tolist()
        res = distribution_feature(nums)
        res[user_id_str_col_name] = user_id_str
        ress.append(res)

    df_ress = pd.DataFrame(ress)
    df_ress.columns = [c if c == user_id_str_col_name else f"{out_col_name}.{c}" for c in df_ress.columns]
    df = df.merge(df_ress)

    return df
Beispiel #2
0
def feature_number_of_emoji_in_description(df : pd.DataFrame, in_col_name : str = "description", out_col_name : str = "number_of_emoji_in_description") -> pd.DataFrame:
    df[out_col_name] = df[in_col_name].apply(lambda x: emoji_count(x))
    return df
Beispiel #3
0
def feature_ratio_of_emoji_in_description(df : pd.DataFrame, in_col_name : str = "description", out_col_name : str = "ratio_of_emoji_in_description") -> pd.DataFrame:
    df[out_col_name] = df[in_col_name].apply(lambda x: emoji_count(x) / len(x) if len(x) > 0 else 0)
    return df
Beispiel #4
0
def feature_number_of_emoji_in_user_name(df : pd.DataFrame, in_col_name : str = "name", out_col_name : str = "number_of_emoji_in_user_name") -> pd.DataFrame:
    df[out_col_name] = df[in_col_name].apply(lambda x: emoji_count(x))
    return df