예제 #1
0
def test_add_recipients():
    data = _add_recipient_test_example_data(
        sender_ids=['a', 'a', 'b', 'c']
    )
    actual = add_recipients(data)
    expected = [['b'], ['a'], ['a', 'b'], []]
    assert_equal(expected, actual[KEY_RECIPIENT_IDS].tolist())
    assert_true('other_field' in actual.columns)
def dump2interactions(input_path, output_path):
    df = make_dataframe(input_path)

    df = add_recipients(df)

    print('before drop empty thread: ', df.shape)
    df = drop_thread_with_no_comments(df)
    print('after drop empty thread: ', df.shape)

    print('fillna subject/body before: ', df['body'].dropna().shape)
    df = fillna_subject_and_body(df)
    print('fillna subject/body after: ', df['body'].dropna().shape)

    print('before remove coverall:', df.shape)
    df = remove_messages_post_by_certain_users(df, ['coveralls'])
    print('after remove coverall:', df.shape)

    df.to_json(output_path, orient="records")
def test_add_recipients():
    data = _add_recipient_test_example_data(sender_ids=['a', 'a', 'b', 'c'])
    actual = add_recipients(data)
    expected = [['b'], ['a'], ['a', 'b'], []]
    assert_equal(expected, actual[KEY_RECIPIENT_IDS].tolist())
    assert_true('other_field' in actual.columns)