Exemple #1
0
    def test_more_visits_A(self):
        input_df = (Test.spark.createDataFrame([['A', 'x', '1'],
                                                ['B', 'x', '2'],
                                                ['B', 'x', '3'],
                                                ['B', 'x', '4'],
                                                ['B', 'x', '5']],
                                               ['domain', 'ip', 'time']))

        output_df = co_visit.covisit(input_df, 0.0, 0, False).toPandas()

        expected_df = (pd.DataFrame(np.array([['A', 'B', 1.0, 1],
                                              ['B', 'A', 0.25, 1]]),
                                    columns=['domain', 'domain2', 'covisit', 'visits']))

        expected_df['covisit'] = expected_df['covisit'].astype(float)
        assert_frame_equal_with_sort(output_df, expected_df, ['domain', 'domain2', 'covisit'])
Exemple #2
0
    def test_multiple_visits_both(self):
        input_df = (Test.spark.createDataFrame([['x', 'A', '10'],
                                          ['x', 'A', '20'],
                                          ['x', 'B', '11'],
                                          ['x', 'B', '21'],
                                          ['y', 'A', '30'],
                                          ['y', 'A', '40'],
                                          ['y', 'B', '31'],
                                          ['y', 'B', '41']],
                                         ['ip', 'domain', 'date_time']))

        output_df = co_visit.covisit(input_df, 0.0, 0, False).toPandas()

        expected_df = (pd.DataFrame(np.array([['A', 'B', 1.0, 4],
                                              ['B', 'A', 1.0, 4]]),
                                    columns=['domain', 'domain2', 'covisit', 'visits']))

        expected_df['covisit'] = expected_df['covisit'].astype(float)
        assert_frame_equal_with_sort(output_df, expected_df, ['domain', 'domain2', 'covisit'])