def test_detect_sequences_case4(self):
        """
        In this case, time period is 0 seconds. Since each record is more than 0 seconds apart
        from its neighbour, the result will have as many sequences as the events,
        with each sequence being single size and sequence length 0
        :return:
        """
        event_1_timestamp = "2018-12-03 12:17:38 +0000"
        event_2_timestamp = "2018-12-03 12:37:13 +0000"
        event_3_timestamp = "2018-12-03 13:06:41 +0000"
        event_4_timestamp = "2018-12-03 13:20:15 +0000"
        test_input = [
            DataPoint(event_1_timestamp, "G"),
            DataPoint(event_2_timestamp, "J"),
            DataPoint(event_3_timestamp, "K"),
            DataPoint(event_4_timestamp, "F"),
        ]

        time_period_secs = 0
        actual_result = detect_sequences(test_input, time_period_secs)
        expected_result = [
            Sequence(0, [DataPoint(event_1_timestamp, "G")]),
            Sequence(0, [DataPoint(event_2_timestamp, "J")]),
            Sequence(0, [DataPoint(event_3_timestamp, "K")]),
            Sequence(0, [DataPoint(event_4_timestamp, "F")]),
        ]
        self.assertEqual(actual_result, expected_result)
 def test_to_data_point(self):
     """
     test case for happy path of to_data_point()
     :return:
     """
     expected_output = DataPoint("2018-12-03 12:17:38 +0000", "G")
     actual_output = to_data_point(
         "TS:2018-12-03 12:17:38 +0000 GMT, Action:G")
     self.assertEqual(actual_output, expected_output)
 def test_select_case1(self):
     """
     There are more sequences than what we want to select
     :return:
     """
     sequences = [
         Sequence(
             300,
             [
                 DataPoint("2018-12-03 12:17:39 +0000", "G"),
                 DataPoint("2018-12-03 12:22:39 +0000", "J"),
             ],
         ),
         Sequence(
             1200,
             [
                 DataPoint("2018-12-03 12:17:38 +0000", "G"),
                 DataPoint("2018-12-03 12:37:38 +0000", "J"),
             ],
         ),
         Sequence(
             60,
             [
                 DataPoint("2018-12-03 13:17:01 +0000", "G"),
                 DataPoint("2018-12-03 13:18:01 +0000", "J"),
             ],
         ),
         Sequence(
             600,
             [
                 DataPoint("2018-12-03 13:17:38 +0000", "G"),
                 DataPoint("2018-12-03 13:27:38 +0000", "J"),
             ],
         ),
     ]
     top_n = 2
     expected_result = [
         (
             1200,
             [
                 ("2018-12-03 12:17:38 +0000", "G"),
                 ("2018-12-03 12:37:38 +0000", "J"),
             ],
         ),
         (
             600,
             [
                 ("2018-12-03 13:17:38 +0000", "G"),
                 ("2018-12-03 13:27:38 +0000", "J"),
             ],
         ),
     ]
     actual_result = select(sequences, top_n)
     self.assertEqual(actual_result, expected_result)
 def test_sort_grouped_data(self):
     """
     test for sort_grouped_data happy path
     :return:
     """
     test_input = (
         USER_ID,
         [
             DataPoint("2018-12-03 12:17:39 +0000", "G"),
             DataPoint("2018-12-03 12:15:39 +0000", "J"),
             DataPoint("2018-12-03 12:14:39 +0000", "J"),
         ],
     )
     expected = [
         DataPoint("2018-12-03 12:14:39 +0000", "J"),
         DataPoint("2018-12-03 12:15:39 +0000", "J"),
         DataPoint("2018-12-03 12:17:39 +0000", "G"),
     ]
     result = sort_grouped_data(test_input)
     self.assertEqual(expected, result)
    def test_detect_sequences_case2(self):
        """
        time_period is 30 minutes,
        case when first 3 records fall in one sequence, last 3 in the next
        :return:
        """
        first_seq_start_timestamp = "2018-12-03 12:17:38 +0000"
        first_seq_end_timestamp = "2018-12-03 12:19:41 +0000"
        second_seq_start_timestamp = "2018-12-03 13:20:15 +0000"
        second_seq_end_timestamp = "2018-12-03 13:37:38 +0000"

        test_input = [
            DataPoint(first_seq_start_timestamp, "G"),
            DataPoint("2018-12-03 12:18:13 +0000", "J"),
            DataPoint(first_seq_end_timestamp, "K"),
            DataPoint(second_seq_start_timestamp, "F"),
            DataPoint("2018-12-03 13:31:56 +0000", "G"),
            DataPoint(second_seq_end_timestamp, "E"),
        ]

        time_period_secs = 1800  # 30 minutes
        actual_result = detect_sequences(test_input, time_period_secs)
        expected_result = [
            Sequence(
                calculate_sec_difference(first_seq_end_timestamp,
                                         first_seq_start_timestamp, DATE_FMT),
                [
                    DataPoint(first_seq_start_timestamp, "G"),
                    DataPoint("2018-12-03 12:18:13 +0000", "J"),
                    DataPoint(first_seq_end_timestamp, "K"),
                ],
            ),
            Sequence(
                calculate_sec_difference(second_seq_end_timestamp,
                                         second_seq_start_timestamp, DATE_FMT),
                [
                    DataPoint(second_seq_start_timestamp, "F"),
                    DataPoint("2018-12-03 13:31:56 +0000", "G"),
                    DataPoint(second_seq_end_timestamp, "E"),
                ],
            ),
        ]
        self.assertEqual(actual_result, expected_result)
 def test_detect_sequences_case1(self):
     """
     case when time_period is 1 hour, and each consecutive record
     in within an hour of its neighbour
     In this case, all the records will fall in same sequence
     """
     first_timestamp = "2018-12-03 12:17:38 +0000"
     last_timestamp = "2018-12-03 13:37:38 +0000"
     test_input = [
         DataPoint(first_timestamp, "G"),
         DataPoint("2018-12-03 12:37:13 +0000", "J"),
         DataPoint("2018-12-03 13:06:41 +0000", "K"),
         DataPoint("2018-12-03 13:20:15 +0000", "F"),
         DataPoint("2018-12-03 13:31:56 +0000", "G"),
         DataPoint(last_timestamp, "E"),
     ]
     time_period_secs = 3600
     actual_result = detect_sequences(test_input, time_period_secs)
     expected_result = [
         Sequence(
             calculate_sec_difference(last_timestamp, first_timestamp,
                                      DATE_FMT),
             [
                 DataPoint(first_timestamp, "G"),
                 DataPoint("2018-12-03 12:37:13 +0000", "J"),
                 DataPoint("2018-12-03 13:06:41 +0000", "K"),
                 DataPoint("2018-12-03 13:20:15 +0000", "F"),
                 DataPoint("2018-12-03 13:31:56 +0000", "G"),
                 DataPoint(last_timestamp, "E"),
             ],
         )
     ]
     self.assertEqual(actual_result, expected_result)
    def test_detect_sequences_case3(self):
        """
        time period is 30 minutes
        case when first 3 records fall in one sequence, next 2 in the second sequence,
        last sequence has only one element and therefore the sequence length of this one is zero
        :return:
        """
        first_seq_start_timestamp = "2018-12-03 12:17:38 +0000"
        first_seq_end_timestamp = "2018-12-03 12:19:41 +0000"
        second_seq_start_timestamp = "2018-12-03 13:20:15 +0000"
        second_seq_end_timestamp = "2018-12-03 13:37:38 +0000"

        test_input = [
            DataPoint(first_seq_start_timestamp, "G"),
            DataPoint("2018-12-03 12:18:13 +0000", "J"),
            DataPoint(first_seq_end_timestamp, "K"),
            DataPoint(second_seq_start_timestamp, "F"),
            DataPoint(second_seq_end_timestamp, "E"),
            DataPoint("2018-12-03 14:37:38 +0000", "E"),
        ]

        time_period_secs = 1800  # 30 minutes
        actual_result = detect_sequences(test_input, time_period_secs)
        expected_result = [
            Sequence(
                calculate_sec_difference(first_seq_end_timestamp,
                                         first_seq_start_timestamp, DATE_FMT),
                [
                    DataPoint(first_seq_start_timestamp, "G"),
                    DataPoint("2018-12-03 12:18:13 +0000", "J"),
                    DataPoint(first_seq_end_timestamp, "K"),
                ],
            ),
            Sequence(
                calculate_sec_difference(second_seq_end_timestamp,
                                         second_seq_start_timestamp, DATE_FMT),
                [
                    DataPoint(second_seq_start_timestamp, "F"),
                    DataPoint(second_seq_end_timestamp, "E"),
                ],
            ),
            Sequence(0, [DataPoint("2018-12-03 14:37:38 +0000", "E")]),
        ]
        self.assertEqual(actual_result, expected_result)