def test_add_recipients(): data = _add_recipient_test_example_data( sender_ids=['a', 'a', 'b', 'c'] ) actual = add_recipients(data) expected = [['b'], ['a'], ['a', 'b'], []] assert_equal(expected, actual[KEY_RECIPIENT_IDS].tolist()) assert_true('other_field' in actual.columns)
def dump2interactions(input_path, output_path): df = make_dataframe(input_path) df = add_recipients(df) print('before drop empty thread: ', df.shape) df = drop_thread_with_no_comments(df) print('after drop empty thread: ', df.shape) print('fillna subject/body before: ', df['body'].dropna().shape) df = fillna_subject_and_body(df) print('fillna subject/body after: ', df['body'].dropna().shape) print('before remove coverall:', df.shape) df = remove_messages_post_by_certain_users(df, ['coveralls']) print('after remove coverall:', df.shape) df.to_json(output_path, orient="records")
def test_add_recipients(): data = _add_recipient_test_example_data(sender_ids=['a', 'a', 'b', 'c']) actual = add_recipients(data) expected = [['b'], ['a'], ['a', 'b'], []] assert_equal(expected, actual[KEY_RECIPIENT_IDS].tolist()) assert_true('other_field' in actual.columns)