Beispiel #1
0
def test_merge():
    chat1 = WhatsAppChat.from_source(filename1)
    chat2 = WhatsAppChat.from_source(filename2)
    chat = chat1.merge(chat2)
    assert (isinstance(chat.df, pd.DataFrame))
    chat = chat1.merge(chat2, rename_users={'J': ['John']})
    assert (isinstance(chat.df, pd.DataFrame))
Beispiel #2
0
def test_object_from_sources(tmpdir):
    chat = WhatsAppChat.from_sources([filename1, filename2])
    assert (isinstance(chat.df, pd.DataFrame))
    chat = WhatsAppChat.from_sources([filename2, filename1])
    assert (isinstance(chat.df, pd.DataFrame))
    chat = WhatsAppChat.from_sources([filename2, filename1], auto_header=True)
    assert (isinstance(chat.df, pd.DataFrame))
    hformat = [hformat_merge, hformat_merge]
    chat = WhatsAppChat.from_sources([filename2, filename1],
                                     auto_header=False,
                                     hformat=hformat)
    assert (isinstance(chat.df, pd.DataFrame))
Beispiel #3
0
def test_interventions_date_cumsum():
    chat = WhatsAppChat.from_source(filename)
    counts = get_interventions_count(chat=chat,
                                     date_mode='date',
                                     msg_length=False,
                                     cumulative=True)

    assert (isinstance(counts, pd.DataFrame))
    # Asswert chat df and counts df have same users
    assert (set(chat.users) == set(counts.columns))
    assert (len(chat.users) == counts.shape[1])

    # Assert chat df and counts df have same date window
    assert (chat.df.index.max().date() == counts.index.max().date())
    assert (chat.df.index.min().date() == counts.index.min().date())

    # TO BE DEPRECATED
    counts = get_interventions_count(chat=chat,
                                     date_mode='date',
                                     msg_length=False,
                                     cummulative=True)

    assert (isinstance(counts, pd.DataFrame))
    # Asswert chat df and counts df have same users
    assert (set(chat.users) == set(counts.columns))
    assert (len(chat.users) == counts.shape[1])

    # Assert chat df and counts df have same date window
    assert (chat.df.index.max().date() == counts.index.max().date())
    assert (chat.df.index.min().date() == counts.index.min().date())
Beispiel #4
0
def test_get_response_matrix_2():
    chat = WhatsAppChat.from_source(filename)
    df_resp = get_response_matrix(chat=chat, zero_own=False)

    # Check shape and colnames of returned dataframe
    n_users = len(chat.users)
    assert(df_resp.shape == (n_users, n_users))
    assert(set(chat.users) == set(df_resp.columns))
Beispiel #5
0
def test_interventions_error_1():
    chat = WhatsAppChat.from_source(filename)
    with pytest.raises(ValueError):
        _ = get_interventions_count(chat=chat,
                                    date_mode='error',
                                    msg_length=False)
    with pytest.raises(ValueError):
        _ = get_interventions_count(chat=chat,
                                    date_mode='error',
                                    msg_length=True)
Beispiel #6
0
def test_get_response_matrix_5():
    chat = WhatsAppChat.from_source(filename)
    df_resp = get_response_matrix(chat=chat, norm='receiver')

    # Check shape and colnames of returned dataframe
    n_users = len(chat.users)
    assert(df_resp.shape == (n_users, n_users))
    assert(set(chat.users) == set(df_resp.columns))

    # Check scaling has been done correct
    assert(all([math.isclose(x, 1) for x in df_resp.sum(axis=0)]))
Beispiel #7
0
def test_get_response_matrix_1():
    chat = WhatsAppChat.from_source(filename)
    df_resp = get_response_matrix(chat=chat, zero_own=True)

    # Check shape and colnames of returned dataframe
    n_users = len(chat.users)
    assert(df_resp.shape == (n_users, n_users))
    assert(set(chat.users) == set(df_resp.columns))

    # Check diagonal of returned dataframe is zero
    assert(all([df_resp.loc[user, user] == 0 for user in df_resp.columns]))
Beispiel #8
0
def test_get_response_matrix_3():
    chat = WhatsAppChat.from_source(filename)
    df_resp = get_response_matrix(chat=chat, norm='joint')

    # Check shape and colnames of returned dataframe
    n_users = len(chat.users)
    assert(df_resp.shape == (n_users, n_users))
    assert(set(chat.users) == set(df_resp.columns))

    # Check scaling has been done correct
    assert(math.isclose(df_resp.sum().sum(), 1))
Beispiel #9
0
    def generate(self, filepath=None, hformat=None, last_timestamp=None):
        """Generate random chat as :func:`WhatsAppChat <whatstk.whatsapp.objects.WhatsAppChat>`.

        Args:
            filepath (str): If given, generated chat is saved with name ``filepath`` (must be a local path).
            hformat (str, optional): :ref:`Format of the header <The header format>`, e.g.
                                    ``'[%y-%m-%d %H:%M:%S] - %name:'``.
            last_timestamp (datetime, optional): Datetime of last message. If `None`, defaults to current date.

        Returns:
            WhatsAppChat: Chat with random messages.

        ..  seealso::

            * :func:`WhatsAppChat.to_txt <whatstk.whatsapp.objects.WhatsAppChat.to_txt>`

        """
        df = self._generate_df(last_timestamp=last_timestamp)
        chat = WhatsAppChat(df)
        if filepath:
            chat.to_txt(filepath=filepath, hformat=hformat)
        return chat
Beispiel #10
0
def main():
    """Main script."""
    args = _parse_args()
    chat = WhatsAppChat.from_source(filepath=args.input_filename,
                                    hformat=args.hformat)

    if args.type == "interventions_count":
        fig = FigureBuilder(chat=chat).user_interventions_count_linechart(
            date_mode=args.icount_date_mode,
            msg_length=False,
            cumulative=args.icount_cumulative)
    elif args.type == "msg_length":
        fig = FigureBuilder(chat=chat).user_msg_length_boxplot()
    plot(fig, filename=args.output_filename)
Beispiel #11
0
def test_interventions_hour_msg_length():
    chat = WhatsAppChat.from_source(filename)
    counts = get_interventions_count(chat=chat,
                                     date_mode='hour',
                                     msg_length=True)

    assert (isinstance(counts, pd.DataFrame))
    # Asswert chat df and counts df have same users
    assert (set(chat.users) == set(counts.columns))
    assert (len(chat.users) == counts.shape[1])

    # Check range hours
    assert (counts.index.max() == chat.df.index.hour.max())
    assert (counts.index.min() == chat.df.index.hour.min())
Beispiel #12
0
def test_interventions_date_all():
    chat = WhatsAppChat.from_source(filename)
    counts = get_interventions_count(chat=chat,
                                     date_mode='date',
                                     msg_length=False,
                                     all_users=True)

    assert (isinstance(counts, pd.DataFrame))
    # Asswert chat df and counts df have same users
    assert (len(counts.columns) == 1)
    assert (counts.columns == ['interventions count'])

    # Assert chat df and counts df have same date window
    assert (chat.df.index.max().date() == counts.index.max().date())
    assert (chat.df.index.min().date() == counts.index.min().date())
Beispiel #13
0
def test_object_to_csv_1(tmpdir):
    chat = WhatsAppChat.from_source(filename)
    filename_ = tmpdir.join("export.csv")
    chat.to_csv(filepath=str(filename_))
Beispiel #14
0
def main():
    """Main script."""
    args = _parse_args()
    chat = WhatsAppChat.from_source(filepath=args.input_filename,
                                    hformat=args.hformat)
    chat.to_csv(args.output_filename)
Beispiel #15
0
def test_object_hformat():
    chat = WhatsAppChat.from_source(filename)
    assert (isinstance(chat.df, pd.DataFrame))

    chat = WhatsAppChat.from_source(filename)
    assert (isinstance(chat.df, pd.DataFrame))
Beispiel #16
0
def test_len():
    chat = WhatsAppChat.from_source(filename)
    assert (isinstance(len(chat), int))
Beispiel #17
0
def get_response_matrix(df=None, chat=None, zero_own=True, norm=NORMS.ABSOLUTE):
    """Get response matrix for given chat.

    Obtains a DataFrame of shape `[n_users, n_users]` counting the number of responses between members. Responses can
    be counted in different ways, e.g. using absolute values or normalised values. Responses are counted based solely
    on consecutive messages. That is, if :math:`user_i` sends a message right after :math:`user_j`, it will be counted
    as a response from :math:`user_i` to :math:`user_j`.

    Axis 0 lists senders and axis 1 lists receivers. That is, the value in cell (i, j) denotes the number of times
    :math:`user_i` responded to a message from :math:`user_j`.

    **Note**: Either ``df`` or ``chat`` must be provided.

    Args:
        df (pandas.DataFrame, optional): Chat data. Atribute `df` of a chat loaded using Chat. If a value is given,
                                            ``chat`` is ignored.
        chat (Chat, optional): Chat data. Object obtained when chat loaded using Chat. Required if ``df`` is None.
        zero_own (bool, optional): Set to True to avoid counting own responses. Defaults to True.
        norm (str, optional): Specifies the type of normalization used for reponse count. Can be:

                                - ``'absolute'``: Absolute count of messages.
                                - ``'joint'``: Normalized by total number of messages sent by all users.
                                - ``'sender'``: Normalized per sender by total number of messages sent by user.
                                - ``'receiver'``: Normalized per receiver by total number of messages sent by user.

    Returns:
        pandas.DataFrame: Response matrix.

    Example:
            Get absolute count on responses (consecutive messages) between users.

            ..  code-block:: python

                >>> from whatstk import WhatsAppChat
                >>> from whatstk.analysis import get_response_matrix
                >>> from whatstk.data import whatsapp_urls
                >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.POKEMON)
                >>> responses = get_response_matrix(chat=chat)
                >>> responses
                                Ash Ketchum  Brock  ...  Raichu  Wobbuffet
                Ash Ketchum               0      0  ...       1          0
                Brock                     1      0  ...       0          0
                Jessie & James            0      1  ...       0          0
                Meowth                    0      0  ...       0          0
                Misty                     2      1  ...       1          0
                Prof. Oak                 0      1  ...       0          0
                Raichu                    1      0  ...       0          0
                Wobbuffet                 0      0  ...       0          0

    """
    # Get chat df and users
    df = _get_df(df=df, chat=chat)
    users = WhatsAppChat(df).users
    # Get list of username transitions and initialize dicitonary with counts
    user_transitions = df[COLNAMES_DF.USERNAME].tolist()
    responses = {user: dict(zip(users, [0]*len(users))) for user in users}
    # Fill count dictionary
    for i in range(1, len(user_transitions)):
        sender = user_transitions[i]
        receiver = user_transitions[i-1]
        if zero_own and (sender != receiver):
            responses[sender][receiver] += 1
        elif not zero_own:
            responses[sender][receiver] += 1
    responses = pd.DataFrame.from_dict(responses, orient='index')

    # Normalize
    if norm not in [NORMS.ABSOLUTE, NORMS.JOINT, NORMS.RECEIVER, NORMS.SENDER]:
        raise ValueError("norm not valid. See NORMS variable in whatstk.analysis.resposes")
    else:
        if norm == NORMS.JOINT:
            responses /= responses.sum().sum()
        elif norm == NORMS.RECEIVER:
            responses /= responses.sum(axis=0)
        elif norm == NORMS.SENDER:
            responses = responses.divide(responses.sum(axis=1), axis=0)
    return responses
Beispiel #18
0
def test_rename_users():
    chat = WhatsAppChat.from_source(filename)
    chat = chat.rename_users(mapping={'J': ['John']})
    assert (isinstance(chat.df, pd.DataFrame))
Beispiel #19
0
def test_rename_users_error():
    chat = WhatsAppChat.from_source(filename)
    with pytest.raises(ValueError):
        chat = chat.rename_users(mapping={'J': 'John'})
Beispiel #20
0
def test_properties():
    chat = WhatsAppChat.from_source(filepath)

    assert (isinstance(chat.start_date, datetime))
    assert (isinstance(chat.end_date, datetime))
Beispiel #21
0
def test_object_error():
    with pytest.raises(ValueError):
        _ = WhatsAppChat.from_source(filename, auto_header=False)
Beispiel #22
0
def test_object_from_source_error(tmpdir):
    with pytest.raises((HFormatError, KeyError)):
        _ = WhatsAppChat.from_source(filename, hformat="%y%name")
Beispiel #23
0
def load_chat_as_df():
    return WhatsAppChat.from_source(filename).df
Beispiel #24
0
def test_get_response_matrix_error():
    chat = WhatsAppChat.from_source(filename)
    with pytest.raises(ValueError):
        _ = get_response_matrix(chat=chat, norm='error')
Beispiel #25
0
def test_object_to_txt(tmpdir):
    chat = WhatsAppChat.from_source(filename)
    filename_ = tmpdir.join("export")
    with pytest.raises(ValueError):
        chat.to_txt(filepath=str(filename_))
Beispiel #26
0
def load_chat():
    return WhatsAppChat.from_source(filename)