Beispiel #1
0
    def testLocalDtFillTimesDailyOneTz(self):
        key = (2016, 5, 3)
        test_section_list = []
        test_section_list.append(
            self._createTestSection(
                arrow.Arrow(2016, 5, 3, 6, tzinfo=tz.gettz(PST)), PST))
        test_section_list.append(
            self._createTestSection(
                arrow.Arrow(2016, 5, 3, 10, tzinfo=tz.gettz(PST)), PST))
        test_section_list.append(
            self._createTestSection(
                arrow.Arrow(2016, 5, 3, 23, tzinfo=tz.gettz(PST)), PST))

        section_group_df = self.ts.to_data_df(
            eac.get_section_key_for_analysis_results(), test_section_list)
        logging.debug("First row of section_group_df = %s" %
                      section_group_df.iloc[0])
        self.assertEqual(earmt._get_tz(section_group_df), PST)

        ms = ecwms.ModeStatTimeSummary()
        earmt.local_dt_fill_times_daily(key, section_group_df, ms)
        logging.debug("before starting checks, ms = %s" % ms)
        self.assertEqual(ms.ts, 1462258800)
        self.assertEqual(ms.local_dt.day, 3)
        self.assertEqual(ms.local_dt.timezone, PST)
    def testLocalDtFillTimesDailyOneTz(self):
        key = (2016, 5, 3)
        test_section_list = []
        test_section_list.append(
            self._createTestSection(arrow.Arrow(2016,5,3,6, tzinfo=tz.gettz(PST)),
                                    PST))
        test_section_list.append(
            self._createTestSection(arrow.Arrow(2016,5,3,10, tzinfo=tz.gettz(PST)),
                                    PST))
        test_section_list.append(
            self._createTestSection(arrow.Arrow(2016,5,3,23, tzinfo=tz.gettz(PST)),
                                    PST))

        section_group_df = self.ts.to_data_df(esda.CLEANED_SECTION_KEY,
                                              test_section_list)
        logging.debug("First row of section_group_df = %s" % section_group_df.iloc[0])
        self.assertEqual(earmt._get_tz(section_group_df), PST)

        ms = ecwms.ModeStatTimeSummary()
        earmt.local_dt_fill_times_daily(key, section_group_df, ms)
        logging.debug("before starting checks, ms = %s" % ms)
        self.assertEqual(ms.ts, 1462258800)
        self.assertEqual(ms.local_dt.day, 3)
        self.assertEqual(ms.local_dt.timezone, PST)
    def testLocalDtFillTimesDailyMultiTzGoingEast(self):
        key = (2016, 5, 3)
        test_section_list = []
        # This is perhaps an extreme use case, but it is actually a fairly
        # common one with air travel

        # Step 1: user leaves SFO at 1am on the 3rd for JFK on a cross-country flight
        test_section_list.append(
            self._createTestSection(arrow.Arrow(2016,5,3,1, tzinfo=tz.gettz(PST)),
                                    PST))
        # cross-country takes 8 hours, so she arrives in New York at 9:00 IST = 12:00am EDT
        # (taking into account the time difference)
        test_section_list[0]['data'] = self._fillDates(test_section_list[0].data, "end_",
                        arrow.Arrow(2016,5,3,9,tzinfo=tz.gettz(PST)),
                        EST)

        # Step 2: user leaves JFK for LHR at 1pm EST.
        test_section_list.append(
            self._createTestSection(arrow.Arrow(2016,5,3,13, tzinfo=tz.gettz(EST)),
                                    EST))

        # cross-atlantic flight takes 7 hours, so she arrives at LHR at 8:00pm EDT
        # = 2am on the 4th local time
        test_section_list[1]['data'] = self._fillDates(test_section_list[1].data, "end_",
                        arrow.Arrow(2016,5,3,21,tzinfo=tz.gettz(EST)),
                        BST)

        # Then, she catches the train from the airport to her hotel in London
        # at 3am local time = 9:00pm EST
        # So as per local time, this is a new trip
        #
        # This clearly indicates why we need to use the timezone of the end of
        # last section to generate the timestamp for the range. If we use the
        # timezone of the beginning of the trip, we will say that the range ends
        # at midnight EST. But then it should include the next_day_first_trip,
        # which starts at 9pm EST, but it does not.
        # So we should use midnight BST instead. Note that midnight BST was
        # actually during the trip, but then it is no different from a regular
        # trip (in one timezone) where the trip spans the date change
        next_day_first_trip = self._createTestSection(
            arrow.Arrow(2016,5,4,3, tzinfo=tz.gettz(BST)),
            BST)

        section_group_df = self.ts.to_data_df(esda.CLEANED_SECTION_KEY, test_section_list)
        logging.debug("first row is %s" % section_group_df.loc[0])

        # Timestamps are monotonically increasing
        self.assertEqual(section_group_df.start_ts.tolist(),
                         [1462262400, 1462294800])

        # The timezone for the end time is PST since that's where we started
        # the first trip from
        self.assertEqual(earmt._get_tz(section_group_df), PST)

        ms = ecwms.ModeStatTimeSummary()
        earmt.local_dt_fill_times_daily(key, section_group_df, ms)
        logging.debug("before starting checks, ms = %s" % ms)

        self.assertEqual(ms.ts, 1462258800)
        self.assertEqual(ms.local_dt.day, 3)
        self.assertEqual(ms.local_dt.timezone, PST)

        # This test fails if it is not BST
        self.assertGreater(next_day_first_trip.data.start_ts, ms.ts)
    def testLocalDtFillTimesDailyMultiTzGoingWest(self):
        key = (2016, 5, 3)
        test_section_list = []
        # This is perhaps an extreme use case, but it is actually a fairly
        # common one with air travel

        # Step 1: user leaves Delhi at 1am on the 3rd for JFK on the non-stop
        test_section_list.append(
            self._createTestSection(arrow.Arrow(2016,5,3,1, tzinfo=tz.gettz(IST)),
                                    IST))
        # non-stop takes 15 hours, so she arrives in New York at 16:00 IST = 6:30am EDT
        # (taking into account the time difference)

        # Step 2: user leaves JFK for SFO at 7am EST on a non-stop
        test_section_list.append(
            self._createTestSection(arrow.Arrow(2016,5,3,7, tzinfo=tz.gettz(EST)),
                                    EST))

        # cross-country flight takes 8 hours, so she arrives in SFO at 15:00 EDT
        # = 12:00 PDT
        test_section_list[1]['data'] = self._fillDates(test_section_list[1].data, "end_",
                        arrow.Arrow(2016,5,3,15,tzinfo=tz.gettz(EST)),
                        PST)

        # Step 2: user starts a trip out of SFO a midnight of the 4th PST
        # (earliest possible trip)
        # for our timestamp algo to be correct, this has to be after the
        # timestamp for the range
        next_day_first_trip = self._createTestSection(
            arrow.Arrow(2016,5,4,0, tzinfo=tz.gettz(PST)),
                                    PST)

        section_group_df = self.ts.to_data_df(esda.CLEANED_SECTION_KEY,
                                              test_section_list)

        # Timestamps are monotonically increasing
        self.assertEqual(section_group_df.start_ts.tolist(),
                         [1462217400, 1462273200])
        self.assertEqual(next_day_first_trip.data.start_ts, 1462345200)

        # The timezone for the end time is IST since that's where we started
        # the first trip
        self.assertEqual(earmt._get_tz(section_group_df), IST)

        ms = ecwms.ModeStatTimeSummary()
        earmt.local_dt_fill_times_daily(key, section_group_df, ms)
        logging.debug("before starting checks, ms = %s" % ms)

        # The end of the period is the end of the day in PST. So that we can
        # capture trip home from the airport, etc.
        # The next trip must start from the same timezone
        # if a trip straddles two timezones, we need to decide how the metrics
        # are split. A similar issue occurs when the trip straddles two days.
        # We have arbitrarily decided to bucket by start_time, so we follow the
        # same logic and bucket by the timezone of the start time.
        #
        # So the bucket for this day ends at the end of the day in EDT.
        # If we included any trips after noon in SF, e.g. going home from the
        # aiport, then it would extend to midnight PDT.
        #
        # The main argument that I'm trying to articulate is that we need to
        # come up with a notion of when the bucket ended. To some extent, we can
        # set this arbitrarily between the end of the last trip on the 3rd and the
        # and the start of the first trip on the 4th.
        #
        # Picking midnight on the timezone of the last trip on the 3rd is
        # reasonable since we know that no trips have started since the last
        # trip on the 3rd to the midnight of the 3rd EST.

        # So the worry here is that the first trip on the next day may be on
        # next day in the end timezone of the trip but on the same day in the
        # start timezone of the trip
        # e.g. reverse trip
        # maybe using the end of the section is best after all

        self.assertEqual(ms.ts, 1462213800)
        self.assertEqual(ms.local_dt.day, 3)
        self.assertEqual(ms.local_dt.timezone, IST)
        self.assertGreater(next_day_first_trip.data.start_ts, ms.ts)
Beispiel #5
0
    def testLocalDtFillTimesDailyMultiTzGoingEast(self):
        key = (2016, 5, 3)
        test_section_list = []
        # This is perhaps an extreme use case, but it is actually a fairly
        # common one with air travel

        # Step 1: user leaves SFO at 1am on the 3rd for JFK on a cross-country flight
        test_section_list.append(
            self._createTestSection(arrow.Arrow(2016,5,3,1, tzinfo=tz.gettz(PST)),
                                    PST))
        # cross-country takes 8 hours, so she arrives in New York at 9:00 IST = 12:00am EDT
        # (taking into account the time difference)
        test_section_list[0]['data'] = self._fillDates(test_section_list[0].data, "end_",
                        arrow.Arrow(2016,5,3,9,tzinfo=tz.gettz(PST)),
                        EST)

        # Step 2: user leaves JFK for LHR at 1pm EST.
        test_section_list.append(
            self._createTestSection(arrow.Arrow(2016,5,3,13, tzinfo=tz.gettz(EST)),
                                    EST))

        # cross-atlantic flight takes 7 hours, so she arrives at LHR at 8:00pm EDT
        # = 2am on the 4th local time
        test_section_list[1]['data'] = self._fillDates(test_section_list[1].data, "end_",
                        arrow.Arrow(2016,5,3,21,tzinfo=tz.gettz(EST)),
                        BST)

        # Then, she catches the train from the airport to her hotel in London
        # at 3am local time = 9:00pm EST
        # So as per local time, this is a new trip
        #
        # This clearly indicates why we need to use the timezone of the end of
        # last section to generate the timestamp for the range. If we use the
        # timezone of the beginning of the trip, we will say that the range ends
        # at midnight EST. But then it should include the next_day_first_trip,
        # which starts at 9pm EST, but it does not.
        # So we should use midnight BST instead. Note that midnight BST was
        # actually during the trip, but then it is no different from a regular
        # trip (in one timezone) where the trip spans the date change
        next_day_first_trip = self._createTestSection(
            arrow.Arrow(2016,5,4,3, tzinfo=tz.gettz(BST)),
            BST)

        section_group_df = pd.DataFrame(
            [self.ts._to_df_entry(s) for s in test_section_list])

        logging.debug("first row is %s" % section_group_df.loc[0])

        # Timestamps are monotonically increasing
        self.assertEqual(section_group_df.start_ts.tolist(),
                         [1462262400, 1462294800])

        # The timezone for the end time is PST since that's where we started
        # the first trip from
        self.assertEqual(earmt._get_tz(section_group_df), PST)

        ms = ecwms.ModeStatTimeSummary()
        earmt.local_dt_fill_times_daily(key, section_group_df, ms)
        logging.debug("before starting checks, ms = %s" % ms)

        self.assertEqual(ms.ts, 1462258800)
        self.assertEqual(ms.local_dt.day, 3)
        self.assertEqual(ms.local_dt.timezone, PST)

        # This test fails if it is not BST
        self.assertGreater(next_day_first_trip.data.start_ts, ms.ts)
Beispiel #6
0
    def testLocalDtFillTimesDailyMultiTzGoingWest(self):
        key = (2016, 5, 3)
        test_section_list = []
        # This is perhaps an extreme use case, but it is actually a fairly
        # common one with air travel

        # Step 1: user leaves Delhi at 1am on the 3rd for JFK on the non-stop
        test_section_list.append(
            self._createTestSection(arrow.Arrow(2016,5,3,1, tzinfo=tz.gettz(IST)),
                                    IST))
        # non-stop takes 15 hours, so she arrives in New York at 16:00 IST = 6:30am EDT
        # (taking into account the time difference)

        # Step 2: user leaves JFK for SFO at 7am EST on a non-stop
        test_section_list.append(
            self._createTestSection(arrow.Arrow(2016,5,3,7, tzinfo=tz.gettz(EST)),
                                    EST))

        # cross-country flight takes 8 hours, so she arrives in SFO at 15:00 EDT
        # = 12:00 PDT
        test_section_list[1]['data'] = self._fillDates(test_section_list[1].data, "end_",
                        arrow.Arrow(2016,5,3,15,tzinfo=tz.gettz(EST)),
                        PST)

        # Step 2: user starts a trip out of SFO a midnight of the 4th PST
        # (earliest possible trip)
        # for our timestamp algo to be correct, this has to be after the
        # timestamp for the range
        next_day_first_trip = self._createTestSection(
            arrow.Arrow(2016,5,4,0, tzinfo=tz.gettz(PST)),
                                    PST)

        section_group_df = pd.DataFrame(
            [self.ts._to_df_entry(s) for s in test_section_list])

        # Timestamps are monotonically increasing
        self.assertEqual(section_group_df.start_ts.tolist(),
                         [1462217400, 1462273200])
        self.assertEqual(next_day_first_trip.data.start_ts, 1462345200)

        # The timezone for the end time is IST since that's where we started
        # the first trip
        self.assertEqual(earmt._get_tz(section_group_df), IST)

        ms = ecwms.ModeStatTimeSummary()
        earmt.local_dt_fill_times_daily(key, section_group_df, ms)
        logging.debug("before starting checks, ms = %s" % ms)

        # The end of the period is the end of the day in PST. So that we can
        # capture trip home from the airport, etc.
        # The next trip must start from the same timezone
        # if a trip straddles two timezones, we need to decide how the metrics
        # are split. A similar issue occurs when the trip straddles two days.
        # We have arbitrarily decided to bucket by start_time, so we follow the
        # same logic and bucket by the timezone of the start time.
        #
        # So the bucket for this day ends at the end of the day in EDT.
        # If we included any trips after noon in SF, e.g. going home from the
        # aiport, then it would extend to midnight PDT.
        #
        # The main argument that I'm trying to articulate is that we need to
        # come up with a notion of when the bucket ended. To some extent, we can
        # set this arbitrarily between the end of the last trip on the 3rd and the
        # and the start of the first trip on the 4th.
        #
        # Picking midnight on the timezone of the last trip on the 3rd is
        # reasonable since we know that no trips have started since the last
        # trip on the 3rd to the midnight of the 3rd EST.

        # So the worry here is that the first trip on the next day may be on
        # next day in the end timezone of the trip but on the same day in the
        # start timezone of the trip
        # e.g. reverse trip
        # maybe using the end of the section is best after all

        self.assertEqual(ms.ts, 1462213800)
        self.assertEqual(ms.local_dt.day, 3)
        self.assertEqual(ms.local_dt.timezone, IST)
        self.assertGreater(next_day_first_trip.data.start_ts, ms.ts)