def test_more_visits_A(self): input_df = (Test.spark.createDataFrame([['A', 'x', '1'], ['B', 'x', '2'], ['B', 'x', '3'], ['B', 'x', '4'], ['B', 'x', '5']], ['domain', 'ip', 'time'])) output_df = co_visit.covisit(input_df, 0.0, 0, False).toPandas() expected_df = (pd.DataFrame(np.array([['A', 'B', 1.0, 1], ['B', 'A', 0.25, 1]]), columns=['domain', 'domain2', 'covisit', 'visits'])) expected_df['covisit'] = expected_df['covisit'].astype(float) assert_frame_equal_with_sort(output_df, expected_df, ['domain', 'domain2', 'covisit'])
def test_multiple_visits_both(self): input_df = (Test.spark.createDataFrame([['x', 'A', '10'], ['x', 'A', '20'], ['x', 'B', '11'], ['x', 'B', '21'], ['y', 'A', '30'], ['y', 'A', '40'], ['y', 'B', '31'], ['y', 'B', '41']], ['ip', 'domain', 'date_time'])) output_df = co_visit.covisit(input_df, 0.0, 0, False).toPandas() expected_df = (pd.DataFrame(np.array([['A', 'B', 1.0, 4], ['B', 'A', 1.0, 4]]), columns=['domain', 'domain2', 'covisit', 'visits'])) expected_df['covisit'] = expected_df['covisit'].astype(float) assert_frame_equal_with_sort(output_df, expected_df, ['domain', 'domain2', 'covisit'])