def main(result_dict, df, chat_members, running_all_functions):
    result_dict['average word length'] = []
    if not running_all_functions:
        df['is attachment?'] = df['type'].apply(helpers.is_attachment)
        df['is link?'] = df['text'].apply(helpers.is_link)
    df['word length'] = df['text'].apply(helpers.average_word_length)
    for member_name in chat_members:
        initialize_result_dict(member_name, df, result_dict)
        average_word_length = df[(df['sender'] == member_name)
                                 & (~df['is reaction?'])
                                 & (~df['is attachment?'])
                                 & (~df['is link?'])]['word length'].mean()
        if math.isnan(average_word_length):
            average_word_length = 0
        result_dict['average word length'].append(round(
            average_word_length, 1))
Beispiel #2
0
def main(result_dict, df, chat_members):
    result_dict['messages that are games'] = []
    result_dict['% of messages that are games'] = []
    result_dict['messages that are game starts'] = []
    result_dict['% of game starts that are by this person'] = []
    df['is game message?'] = df.apply(
        lambda msg: helpers.is_game_message(msg.text, msg.type), axis=1)
    df['is game start?'] = df.apply(
        lambda msg: helpers.is_game_start(msg.text, msg.type), axis=1)
    for member_name in chat_members:
        _, non_reaction_messages = initialize_result_dict(
            member_name, df, result_dict)
        game_messages = len(df[(df['is game message?'])
                               & (df['sender'] == member_name)])
        game_starts = len(df[(df['is game start?'])
                             & (df['sender'] == member_name)])
        result_dict['messages that are games'].append(game_messages)
        result_dict['% of messages that are games'].append(
            round(
                helpers.safe_divide(game_messages, non_reaction_messages) *
                100, 2))
        result_dict['messages that are game starts'].append(game_starts)
    total_game_starts = sum(result_dict['messages that are game starts'])
    for i in range(len(result_dict['messages that are game starts'])):
        result_dict['% of game starts that are by this person'].append(
            round(
                helpers.safe_divide(
                    result_dict['messages that are game starts'][i],
                    total_game_starts) * 100, 2))
def main(result_dict, df, chat_members, minutes_threshold):
    result_dict['conversation starters'] = []
    result_dict['% of all conversation starters that are by this person'] = []
    df['is conversation starter?'] = df['time'].diff().apply(
        lambda diff: helpers.is_conversation_starter(diff, minutes_threshold))
    df.iloc[0, df.columns.get_loc('is conversation starter?')] = True
    for member_name in chat_members:
        initialize_result_dict(member_name, df, result_dict)
        conversation_starters = len(df[(df['is conversation starter?'])
                                       & (df['sender'] == member_name)
                                       & (~df['is reaction?'])])
        result_dict['conversation starters'].append(conversation_starters)
    total_conversation_starters = sum(result_dict['conversation starters'])
    for i in range(len(result_dict['conversation starters'])):
        result_dict[
            '% of all conversation starters that are by this person'].append(
                round(
                    helpers.safe_divide(
                        result_dict['conversation starters'][i],
                        total_conversation_starters) * 100, 2))
def main(result_dict, df, chat_members):
    result_dict['total messages'] = []
    result_dict['% of all messages that are by this person'] = []
    for member_name in chat_members:
        total_messages, _ = initialize_result_dict(member_name, df,
                                                   result_dict)
        result_dict['total messages'].append(total_messages)
    total_messages = sum(result_dict['total messages'])
    for i in range(len(result_dict['total messages'])):
        result_dict['% of all messages that are by this person'].append(
            round(
                helpers.safe_divide(result_dict['total messages'][i],
                                    total_messages) * 100, 2))
Beispiel #5
0
def main(result_dict, df, chat_members):
    result_dict['messages that are tweets'] = []
    result_dict['% of messages that are tweets'] = []
    df['is tweet?'] = df['text'].apply(helpers.is_tweet)
    for member_name in chat_members:
        _, non_reaction_messages = initialize_result_dict(member_name, df, result_dict)
        tweet_messages = len(
            df[(df['is tweet?']) & (df['sender'] == member_name)]
        )
        result_dict['messages that are tweets'].append(tweet_messages)
        result_dict['% of messages that are tweets'].append(
            round(helpers.safe_divide(tweet_messages, non_reaction_messages) * 100, 2)
        )
def main(result_dict, df, chat_members):
    result_dict['messages that contain emoji'] = []
    result_dict['% of messages that include emoji'] = []
    df['includes emoji?'] = df['text'].apply(helpers.includes_emoji)
    for member_name in chat_members:
        _, non_reaction_messages = initialize_result_dict(
            member_name, df, result_dict)
        emoji_messages = len(df[(df['includes emoji?'])
                                & (df['sender'] == member_name)])
        result_dict['messages that contain emoji'].append(emoji_messages)
        result_dict['% of messages that include emoji'].append(
            round(
                helpers.safe_divide(emoji_messages, non_reaction_messages) *
                100, 2))
Beispiel #7
0
def main(result_dict, df, chat_members):
    result_dict['all caps messages'] = []
    result_dict['% of messages that are all caps'] = []
    df['is all caps?'] = df['text'].apply(helpers.is_all_caps)
    for member_name in chat_members:
        _, non_reaction_messages = initialize_result_dict(
            member_name, df, result_dict)
        all_caps_messages = len(df[(df['is all caps?'])
                                   & (df['sender'] == member_name)])
        result_dict['all caps messages'].append(all_caps_messages)
        result_dict['% of messages that are all caps'].append(
            round(
                helpers.safe_divide(all_caps_messages, non_reaction_messages) *
                100, 2))
def main(result_dict, df, chat_members):
    result_dict['attachment messages'] = []
    result_dict['% of messages that are attachments'] = []
    df['is attachment?'] = df['type'].apply(helpers.is_attachment)
    for member_name in chat_members:
        _, non_reaction_messages = initialize_result_dict(
            member_name, df, result_dict)
        attachment_messages = len(df[(df['is attachment?'])
                                     & (df['sender'] == member_name)])
        result_dict['attachment messages'].append(attachment_messages)
        result_dict['% of messages that are attachments'].append(
            round(
                helpers.safe_divide(attachment_messages, non_reaction_messages)
                * 100, 2))
Beispiel #9
0
def main(result_dict, df, chat_members, phrase, case_sensitive, separate,
         regex):
    if phrase is None:
        raise Exception('Function is phrase but not given a phrase')
    result_dict[f'messages that contain {phrase}'] = []
    result_dict[f'% of messages that contain {phrase}'] = []
    df[f'includes {phrase}?'] = df['text'].apply(
        lambda msg: helpers.is_phrase_in(phrase, msg, case_sensitive, separate,
                                         regex))
    for member_name in chat_members:
        _, non_reaction_messages = initialize_result_dict(
            member_name, df, result_dict)
        word_messages = len(df[(df[f'includes {phrase}?'])
                               & (df['sender'] == member_name)])
        result_dict[f'messages that contain {phrase}'].append(word_messages)
        result_dict[f'% of messages that contain {phrase}'].append(
            round(
                helpers.safe_divide(word_messages, non_reaction_messages) *
                100, 2))
def main(result_dict, df, chat_members, running_all_functions,
         minutes_threshold):
    result_dict['total # of message series'] = []
    result_dict['total messages'] = []
    result_dict['average messages per series'] = []
    if not running_all_functions:
        df['is conversation starter?'] = df['time'].diff().apply(
            lambda diff: helpers.is_conversation_starter(
                diff, minutes_threshold))
        df.iloc[0, df.columns.get_loc('is conversation starter?')] = True
    df['is new message series?'] = df['sender'].apply(lambda x: True)
    df['is new message series?'] = df['is new message series?'].shift().where(
        df['sender'].shift() != df['sender'], False)
    df.iloc[0, df.columns.get_loc('is new message series?')] = True
    for member_name in chat_members:
        total_messages, _ = initialize_result_dict(member_name, df,
                                                   result_dict)
        message_series = len(df[((df['is new message series?'])
                                 | (df['is conversation starter?']))
                                & (df['sender'] == member_name)])
        result_dict['total # of message series'].append(message_series)
        result_dict['total messages'].append(total_messages)
        result_dict['average messages per series'].append(
            round(helpers.safe_divide(total_messages, message_series), 2))
Beispiel #11
0
def main(result_dict, df, chat_members):
    result_dict['total messages'] = []
    result_dict['non-reaction messages'] = []
    result_dict['% of all non-reaction messages that are by this person'] = []
    result_dict['reaction messages'] = []
    result_dict['% of all reaction messages that are by this person'] = []
    result_dict['% of messages that are reactions'] = []
    result_dict['reactions'] = []
    result_dict['like reacts'] = []
    result_dict['% of reactions that are like reacts'] = []
    result_dict['love reacts'] = []
    result_dict['% of reactions that are love reacts'] = []
    result_dict['dislike reacts'] = []
    result_dict['% of reactions that are dislike reacts'] = []
    result_dict['laugh reacts'] = []
    result_dict['% of reactions that are laugh reacts'] = []
    result_dict['emphasis reacts'] = []
    result_dict['% of reactions that are emphasis reacts'] = []
    result_dict['question reacts'] = []
    result_dict['% of reactions that are question reacts'] = []

    df['reaction action'] = df['text'].apply(helpers.reaction_action)
    df['like react action'] = df['text'].apply(helpers.like_react_action)
    df['love react action'] = df['text'].apply(helpers.love_react_action)
    df['dislike react action'] = df['text'].apply(helpers.dislike_react_action)
    df['laugh react action'] = df['text'].apply(helpers.laugh_react_action)
    df['emphasis react action'] = df['text'].apply(
        helpers.emphasis_react_action)
    df['question react action'] = df['text'].apply(
        helpers.question_react_action)

    for member_name in chat_members:
        total_messages, non_reaction_messages = initialize_result_dict(
            member_name, df, result_dict)
        result_dict['total messages'].append(total_messages)
        result_dict['reaction messages'].append(total_messages -
                                                non_reaction_messages)
        result_dict['non-reaction messages'].append(non_reaction_messages)
        result_dict['% of messages that are reactions'].append(
            round(
                (1 -
                 helpers.safe_divide(non_reaction_messages, total_messages)) *
                100, 2))

        reactions = int(
            df[df['sender'] == member_name]['reaction action'].sum())
        like_reacts = int(
            df[df['sender'] == member_name]['like react action'].sum())
        love_reacts = int(
            df[df['sender'] == member_name]['love react action'].sum())
        dislike_reacts = int(
            df[df['sender'] == member_name]['dislike react action'].sum())
        laugh_reacts = int(
            df[df['sender'] == member_name]['laugh react action'].sum())
        emphasis_reacts = int(
            df[df['sender'] == member_name]['emphasis react action'].sum())
        question_reacts = int(
            df[df['sender'] == member_name]['question react action'].sum())

        result_dict['reactions'].append(reactions)

        result_dict['like reacts'].append(like_reacts)
        result_dict['% of reactions that are like reacts'].append(
            round(helpers.safe_divide(like_reacts, reactions) * 100, 2))

        result_dict['love reacts'].append(love_reacts)
        result_dict['% of reactions that are love reacts'].append(
            round(helpers.safe_divide(love_reacts, reactions) * 100, 2))

        result_dict['dislike reacts'].append(dislike_reacts)
        result_dict['% of reactions that are dislike reacts'].append(
            round(helpers.safe_divide(dislike_reacts, reactions) * 100, 2))

        result_dict['laugh reacts'].append(laugh_reacts)
        result_dict['% of reactions that are laugh reacts'].append(
            round(helpers.safe_divide(laugh_reacts, reactions) * 100, 2))

        result_dict['emphasis reacts'].append(emphasis_reacts)
        result_dict['% of reactions that are emphasis reacts'].append(
            round(helpers.safe_divide(emphasis_reacts, reactions) * 100, 2))

        result_dict['question reacts'].append(question_reacts)
        result_dict['% of reactions that are question reacts'].append(
            round(helpers.safe_divide(question_reacts, reactions) * 100, 2))

    total_non_reaction_messages = sum(result_dict['non-reaction messages'])
    total_reaction_messages = sum(result_dict['reaction messages'])

    for i in range(len(result_dict['total messages'])):
        result_dict[
            '% of all non-reaction messages that are by this person'].append(
                round(
                    helpers.safe_divide(
                        result_dict['non-reaction messages'][i],
                        total_non_reaction_messages) * 100, 2))
        result_dict[
            '% of all reaction messages that are by this person'].append(
                round(
                    helpers.safe_divide(result_dict['reaction messages'][i],
                                        total_reaction_messages) * 100, 2))
Beispiel #12
0
def main(result_dict, df, chat_members, args):
    if not (args.day or args.week or args.month or args.year):
        raise Exception('Must give time period length for graph')

    message_freqs = {}
    if args.graph_individual:
        members = []
        for member_name in chat_members:
            total_messages, _ = initialize_result_dict(
                member_name, df, result_dict)
            if total_messages > 0:
                members.append(member_name)
        for member in members:
            message_freqs[member] = []
    else:
        message_freqs['Total Messages'] = []

    if args.day:
        df['time_period'] = df['time'].apply(helpers.get_day)
        time_period_name = 'day'
    elif args.week:
        df['time_period'] = df['time'].apply(helpers.get_week)
        time_period_name = 'week'
    elif args.month:
        df['time_period'] = df['time'].apply(helpers.get_month)
        time_period_name = 'month'
    elif args.year:
        df['time_period'] = df['time'].apply(helpers.get_year)
        time_period_name = 'year'

    day_fmt = '%m/%d/%y'
    begin_date = datetime.datetime.strptime(df['time_period'].iloc[0], day_fmt)
    end_date = datetime.datetime.strptime(df['time_period'].iloc[-1], day_fmt)
    time_periods = helpers.get_time_periods(begin_date, end_date, time_period_name)
    for time_period in time_periods:
        if args.graph_individual:
            for member_name in members:
                message_freqs[member_name].append(len(
                    df[(df['time_period'] == time_period) & (df['sender'] == member_name)]
                ))
        else:
            message_freqs['Total Messages'].append(len(
                df[df['time_period'] == time_period]
            ))

    colors = [
        'rgba(31, 120, 180, 1)',
        'rgba(51, 160, 44, 1)',
        'rgba(227, 26, 28, 1)',
        'rgba(255, 127, 0, 1)',
        'rgba(106, 61, 154, 1)',
        'rgba(177, 89, 40, 1)',
        'rgba(166, 206, 227, 1)',
        'rgba(178, 223, 138, 1)',
        'rgba(251, 154, 153, 1)',
        'rgba(253, 191, 111, 1)',
        'rgba(202, 178, 214, 1)',
        'rgba(255, 255, 153, 1)'
    ]

    result_dict['graphData'] = {}

    if args.day or args.week:
        result_dict['graphData']['labels'] = time_periods
    elif args.month:
        result_dict['graphData']['labels'] = [
            f'{time_period.split("/")[0]}/{time_period.split("/")[2]}'
            for time_period in time_periods
        ]
    elif args.year:
        result_dict['graphData']['labels'] = [
            f'20{time_period.split("/")[2]}'
            for time_period in time_periods
        ]

    result_dict['graphData']['datasets'] = [
        {
            'label': name,
            'data': message_freqs[name],
            'fill': False,
            'borderColor': colors[i % len(message_freqs)]
        }
        for i, name in enumerate(message_freqs)
    ]