def test_merge(): chat1 = WhatsAppChat.from_source(filename1) chat2 = WhatsAppChat.from_source(filename2) chat = chat1.merge(chat2) assert (isinstance(chat.df, pd.DataFrame)) chat = chat1.merge(chat2, rename_users={'J': ['John']}) assert (isinstance(chat.df, pd.DataFrame))
def test_object_from_sources(tmpdir): chat = WhatsAppChat.from_sources([filename1, filename2]) assert (isinstance(chat.df, pd.DataFrame)) chat = WhatsAppChat.from_sources([filename2, filename1]) assert (isinstance(chat.df, pd.DataFrame)) chat = WhatsAppChat.from_sources([filename2, filename1], auto_header=True) assert (isinstance(chat.df, pd.DataFrame)) hformat = [hformat_merge, hformat_merge] chat = WhatsAppChat.from_sources([filename2, filename1], auto_header=False, hformat=hformat) assert (isinstance(chat.df, pd.DataFrame))
def test_interventions_date_cumsum(): chat = WhatsAppChat.from_source(filename) counts = get_interventions_count(chat=chat, date_mode='date', msg_length=False, cumulative=True) assert (isinstance(counts, pd.DataFrame)) # Asswert chat df and counts df have same users assert (set(chat.users) == set(counts.columns)) assert (len(chat.users) == counts.shape[1]) # Assert chat df and counts df have same date window assert (chat.df.index.max().date() == counts.index.max().date()) assert (chat.df.index.min().date() == counts.index.min().date()) # TO BE DEPRECATED counts = get_interventions_count(chat=chat, date_mode='date', msg_length=False, cummulative=True) assert (isinstance(counts, pd.DataFrame)) # Asswert chat df and counts df have same users assert (set(chat.users) == set(counts.columns)) assert (len(chat.users) == counts.shape[1]) # Assert chat df and counts df have same date window assert (chat.df.index.max().date() == counts.index.max().date()) assert (chat.df.index.min().date() == counts.index.min().date())
def test_get_response_matrix_2(): chat = WhatsAppChat.from_source(filename) df_resp = get_response_matrix(chat=chat, zero_own=False) # Check shape and colnames of returned dataframe n_users = len(chat.users) assert(df_resp.shape == (n_users, n_users)) assert(set(chat.users) == set(df_resp.columns))
def test_interventions_error_1(): chat = WhatsAppChat.from_source(filename) with pytest.raises(ValueError): _ = get_interventions_count(chat=chat, date_mode='error', msg_length=False) with pytest.raises(ValueError): _ = get_interventions_count(chat=chat, date_mode='error', msg_length=True)
def test_get_response_matrix_5(): chat = WhatsAppChat.from_source(filename) df_resp = get_response_matrix(chat=chat, norm='receiver') # Check shape and colnames of returned dataframe n_users = len(chat.users) assert(df_resp.shape == (n_users, n_users)) assert(set(chat.users) == set(df_resp.columns)) # Check scaling has been done correct assert(all([math.isclose(x, 1) for x in df_resp.sum(axis=0)]))
def test_get_response_matrix_1(): chat = WhatsAppChat.from_source(filename) df_resp = get_response_matrix(chat=chat, zero_own=True) # Check shape and colnames of returned dataframe n_users = len(chat.users) assert(df_resp.shape == (n_users, n_users)) assert(set(chat.users) == set(df_resp.columns)) # Check diagonal of returned dataframe is zero assert(all([df_resp.loc[user, user] == 0 for user in df_resp.columns]))
def test_get_response_matrix_3(): chat = WhatsAppChat.from_source(filename) df_resp = get_response_matrix(chat=chat, norm='joint') # Check shape and colnames of returned dataframe n_users = len(chat.users) assert(df_resp.shape == (n_users, n_users)) assert(set(chat.users) == set(df_resp.columns)) # Check scaling has been done correct assert(math.isclose(df_resp.sum().sum(), 1))
def generate(self, filepath=None, hformat=None, last_timestamp=None): """Generate random chat as :func:`WhatsAppChat <whatstk.whatsapp.objects.WhatsAppChat>`. Args: filepath (str): If given, generated chat is saved with name ``filepath`` (must be a local path). hformat (str, optional): :ref:`Format of the header <The header format>`, e.g. ``'[%y-%m-%d %H:%M:%S] - %name:'``. last_timestamp (datetime, optional): Datetime of last message. If `None`, defaults to current date. Returns: WhatsAppChat: Chat with random messages. .. seealso:: * :func:`WhatsAppChat.to_txt <whatstk.whatsapp.objects.WhatsAppChat.to_txt>` """ df = self._generate_df(last_timestamp=last_timestamp) chat = WhatsAppChat(df) if filepath: chat.to_txt(filepath=filepath, hformat=hformat) return chat
def main(): """Main script.""" args = _parse_args() chat = WhatsAppChat.from_source(filepath=args.input_filename, hformat=args.hformat) if args.type == "interventions_count": fig = FigureBuilder(chat=chat).user_interventions_count_linechart( date_mode=args.icount_date_mode, msg_length=False, cumulative=args.icount_cumulative) elif args.type == "msg_length": fig = FigureBuilder(chat=chat).user_msg_length_boxplot() plot(fig, filename=args.output_filename)
def test_interventions_hour_msg_length(): chat = WhatsAppChat.from_source(filename) counts = get_interventions_count(chat=chat, date_mode='hour', msg_length=True) assert (isinstance(counts, pd.DataFrame)) # Asswert chat df and counts df have same users assert (set(chat.users) == set(counts.columns)) assert (len(chat.users) == counts.shape[1]) # Check range hours assert (counts.index.max() == chat.df.index.hour.max()) assert (counts.index.min() == chat.df.index.hour.min())
def test_interventions_date_all(): chat = WhatsAppChat.from_source(filename) counts = get_interventions_count(chat=chat, date_mode='date', msg_length=False, all_users=True) assert (isinstance(counts, pd.DataFrame)) # Asswert chat df and counts df have same users assert (len(counts.columns) == 1) assert (counts.columns == ['interventions count']) # Assert chat df and counts df have same date window assert (chat.df.index.max().date() == counts.index.max().date()) assert (chat.df.index.min().date() == counts.index.min().date())
def test_object_to_csv_1(tmpdir): chat = WhatsAppChat.from_source(filename) filename_ = tmpdir.join("export.csv") chat.to_csv(filepath=str(filename_))
def main(): """Main script.""" args = _parse_args() chat = WhatsAppChat.from_source(filepath=args.input_filename, hformat=args.hformat) chat.to_csv(args.output_filename)
def test_object_hformat(): chat = WhatsAppChat.from_source(filename) assert (isinstance(chat.df, pd.DataFrame)) chat = WhatsAppChat.from_source(filename) assert (isinstance(chat.df, pd.DataFrame))
def test_len(): chat = WhatsAppChat.from_source(filename) assert (isinstance(len(chat), int))
def get_response_matrix(df=None, chat=None, zero_own=True, norm=NORMS.ABSOLUTE): """Get response matrix for given chat. Obtains a DataFrame of shape `[n_users, n_users]` counting the number of responses between members. Responses can be counted in different ways, e.g. using absolute values or normalised values. Responses are counted based solely on consecutive messages. That is, if :math:`user_i` sends a message right after :math:`user_j`, it will be counted as a response from :math:`user_i` to :math:`user_j`. Axis 0 lists senders and axis 1 lists receivers. That is, the value in cell (i, j) denotes the number of times :math:`user_i` responded to a message from :math:`user_j`. **Note**: Either ``df`` or ``chat`` must be provided. Args: df (pandas.DataFrame, optional): Chat data. Atribute `df` of a chat loaded using Chat. If a value is given, ``chat`` is ignored. chat (Chat, optional): Chat data. Object obtained when chat loaded using Chat. Required if ``df`` is None. zero_own (bool, optional): Set to True to avoid counting own responses. Defaults to True. norm (str, optional): Specifies the type of normalization used for reponse count. Can be: - ``'absolute'``: Absolute count of messages. - ``'joint'``: Normalized by total number of messages sent by all users. - ``'sender'``: Normalized per sender by total number of messages sent by user. - ``'receiver'``: Normalized per receiver by total number of messages sent by user. Returns: pandas.DataFrame: Response matrix. Example: Get absolute count on responses (consecutive messages) between users. .. code-block:: python >>> from whatstk import WhatsAppChat >>> from whatstk.analysis import get_response_matrix >>> from whatstk.data import whatsapp_urls >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.POKEMON) >>> responses = get_response_matrix(chat=chat) >>> responses Ash Ketchum Brock ... Raichu Wobbuffet Ash Ketchum 0 0 ... 1 0 Brock 1 0 ... 0 0 Jessie & James 0 1 ... 0 0 Meowth 0 0 ... 0 0 Misty 2 1 ... 1 0 Prof. Oak 0 1 ... 0 0 Raichu 1 0 ... 0 0 Wobbuffet 0 0 ... 0 0 """ # Get chat df and users df = _get_df(df=df, chat=chat) users = WhatsAppChat(df).users # Get list of username transitions and initialize dicitonary with counts user_transitions = df[COLNAMES_DF.USERNAME].tolist() responses = {user: dict(zip(users, [0]*len(users))) for user in users} # Fill count dictionary for i in range(1, len(user_transitions)): sender = user_transitions[i] receiver = user_transitions[i-1] if zero_own and (sender != receiver): responses[sender][receiver] += 1 elif not zero_own: responses[sender][receiver] += 1 responses = pd.DataFrame.from_dict(responses, orient='index') # Normalize if norm not in [NORMS.ABSOLUTE, NORMS.JOINT, NORMS.RECEIVER, NORMS.SENDER]: raise ValueError("norm not valid. See NORMS variable in whatstk.analysis.resposes") else: if norm == NORMS.JOINT: responses /= responses.sum().sum() elif norm == NORMS.RECEIVER: responses /= responses.sum(axis=0) elif norm == NORMS.SENDER: responses = responses.divide(responses.sum(axis=1), axis=0) return responses
def test_rename_users(): chat = WhatsAppChat.from_source(filename) chat = chat.rename_users(mapping={'J': ['John']}) assert (isinstance(chat.df, pd.DataFrame))
def test_rename_users_error(): chat = WhatsAppChat.from_source(filename) with pytest.raises(ValueError): chat = chat.rename_users(mapping={'J': 'John'})
def test_properties(): chat = WhatsAppChat.from_source(filepath) assert (isinstance(chat.start_date, datetime)) assert (isinstance(chat.end_date, datetime))
def test_object_error(): with pytest.raises(ValueError): _ = WhatsAppChat.from_source(filename, auto_header=False)
def test_object_from_source_error(tmpdir): with pytest.raises((HFormatError, KeyError)): _ = WhatsAppChat.from_source(filename, hformat="%y%name")
def load_chat_as_df(): return WhatsAppChat.from_source(filename).df
def test_get_response_matrix_error(): chat = WhatsAppChat.from_source(filename) with pytest.raises(ValueError): _ = get_response_matrix(chat=chat, norm='error')
def test_object_to_txt(tmpdir): chat = WhatsAppChat.from_source(filename) filename_ = tmpdir.join("export") with pytest.raises(ValueError): chat.to_txt(filepath=str(filename_))
def load_chat(): return WhatsAppChat.from_source(filename)