def testLocalDtFillTimesDailyOneTz(self): key = (2016, 5, 3) test_section_list = [] test_section_list.append( self._createTestSection( arrow.Arrow(2016, 5, 3, 6, tzinfo=tz.gettz(PST)), PST)) test_section_list.append( self._createTestSection( arrow.Arrow(2016, 5, 3, 10, tzinfo=tz.gettz(PST)), PST)) test_section_list.append( self._createTestSection( arrow.Arrow(2016, 5, 3, 23, tzinfo=tz.gettz(PST)), PST)) section_group_df = self.ts.to_data_df( eac.get_section_key_for_analysis_results(), test_section_list) logging.debug("First row of section_group_df = %s" % section_group_df.iloc[0]) self.assertEqual(earmt._get_tz(section_group_df), PST) ms = ecwms.ModeStatTimeSummary() earmt.local_dt_fill_times_daily(key, section_group_df, ms) logging.debug("before starting checks, ms = %s" % ms) self.assertEqual(ms.ts, 1462258800) self.assertEqual(ms.local_dt.day, 3) self.assertEqual(ms.local_dt.timezone, PST)
def testLocalDtFillTimesDailyOneTz(self): key = (2016, 5, 3) test_section_list = [] test_section_list.append( self._createTestSection(arrow.Arrow(2016,5,3,6, tzinfo=tz.gettz(PST)), PST)) test_section_list.append( self._createTestSection(arrow.Arrow(2016,5,3,10, tzinfo=tz.gettz(PST)), PST)) test_section_list.append( self._createTestSection(arrow.Arrow(2016,5,3,23, tzinfo=tz.gettz(PST)), PST)) section_group_df = self.ts.to_data_df(esda.CLEANED_SECTION_KEY, test_section_list) logging.debug("First row of section_group_df = %s" % section_group_df.iloc[0]) self.assertEqual(earmt._get_tz(section_group_df), PST) ms = ecwms.ModeStatTimeSummary() earmt.local_dt_fill_times_daily(key, section_group_df, ms) logging.debug("before starting checks, ms = %s" % ms) self.assertEqual(ms.ts, 1462258800) self.assertEqual(ms.local_dt.day, 3) self.assertEqual(ms.local_dt.timezone, PST)
def testLocalDtFillTimesDailyMultiTzGoingEast(self): key = (2016, 5, 3) test_section_list = [] # This is perhaps an extreme use case, but it is actually a fairly # common one with air travel # Step 1: user leaves SFO at 1am on the 3rd for JFK on a cross-country flight test_section_list.append( self._createTestSection(arrow.Arrow(2016,5,3,1, tzinfo=tz.gettz(PST)), PST)) # cross-country takes 8 hours, so she arrives in New York at 9:00 IST = 12:00am EDT # (taking into account the time difference) test_section_list[0]['data'] = self._fillDates(test_section_list[0].data, "end_", arrow.Arrow(2016,5,3,9,tzinfo=tz.gettz(PST)), EST) # Step 2: user leaves JFK for LHR at 1pm EST. test_section_list.append( self._createTestSection(arrow.Arrow(2016,5,3,13, tzinfo=tz.gettz(EST)), EST)) # cross-atlantic flight takes 7 hours, so she arrives at LHR at 8:00pm EDT # = 2am on the 4th local time test_section_list[1]['data'] = self._fillDates(test_section_list[1].data, "end_", arrow.Arrow(2016,5,3,21,tzinfo=tz.gettz(EST)), BST) # Then, she catches the train from the airport to her hotel in London # at 3am local time = 9:00pm EST # So as per local time, this is a new trip # # This clearly indicates why we need to use the timezone of the end of # last section to generate the timestamp for the range. If we use the # timezone of the beginning of the trip, we will say that the range ends # at midnight EST. But then it should include the next_day_first_trip, # which starts at 9pm EST, but it does not. # So we should use midnight BST instead. Note that midnight BST was # actually during the trip, but then it is no different from a regular # trip (in one timezone) where the trip spans the date change next_day_first_trip = self._createTestSection( arrow.Arrow(2016,5,4,3, tzinfo=tz.gettz(BST)), BST) section_group_df = self.ts.to_data_df(esda.CLEANED_SECTION_KEY, test_section_list) logging.debug("first row is %s" % section_group_df.loc[0]) # Timestamps are monotonically increasing self.assertEqual(section_group_df.start_ts.tolist(), [1462262400, 1462294800]) # The timezone for the end time is PST since that's where we started # the first trip from self.assertEqual(earmt._get_tz(section_group_df), PST) ms = ecwms.ModeStatTimeSummary() earmt.local_dt_fill_times_daily(key, section_group_df, ms) logging.debug("before starting checks, ms = %s" % ms) self.assertEqual(ms.ts, 1462258800) self.assertEqual(ms.local_dt.day, 3) self.assertEqual(ms.local_dt.timezone, PST) # This test fails if it is not BST self.assertGreater(next_day_first_trip.data.start_ts, ms.ts)
def testLocalDtFillTimesDailyMultiTzGoingWest(self): key = (2016, 5, 3) test_section_list = [] # This is perhaps an extreme use case, but it is actually a fairly # common one with air travel # Step 1: user leaves Delhi at 1am on the 3rd for JFK on the non-stop test_section_list.append( self._createTestSection(arrow.Arrow(2016,5,3,1, tzinfo=tz.gettz(IST)), IST)) # non-stop takes 15 hours, so she arrives in New York at 16:00 IST = 6:30am EDT # (taking into account the time difference) # Step 2: user leaves JFK for SFO at 7am EST on a non-stop test_section_list.append( self._createTestSection(arrow.Arrow(2016,5,3,7, tzinfo=tz.gettz(EST)), EST)) # cross-country flight takes 8 hours, so she arrives in SFO at 15:00 EDT # = 12:00 PDT test_section_list[1]['data'] = self._fillDates(test_section_list[1].data, "end_", arrow.Arrow(2016,5,3,15,tzinfo=tz.gettz(EST)), PST) # Step 2: user starts a trip out of SFO a midnight of the 4th PST # (earliest possible trip) # for our timestamp algo to be correct, this has to be after the # timestamp for the range next_day_first_trip = self._createTestSection( arrow.Arrow(2016,5,4,0, tzinfo=tz.gettz(PST)), PST) section_group_df = self.ts.to_data_df(esda.CLEANED_SECTION_KEY, test_section_list) # Timestamps are monotonically increasing self.assertEqual(section_group_df.start_ts.tolist(), [1462217400, 1462273200]) self.assertEqual(next_day_first_trip.data.start_ts, 1462345200) # The timezone for the end time is IST since that's where we started # the first trip self.assertEqual(earmt._get_tz(section_group_df), IST) ms = ecwms.ModeStatTimeSummary() earmt.local_dt_fill_times_daily(key, section_group_df, ms) logging.debug("before starting checks, ms = %s" % ms) # The end of the period is the end of the day in PST. So that we can # capture trip home from the airport, etc. # The next trip must start from the same timezone # if a trip straddles two timezones, we need to decide how the metrics # are split. A similar issue occurs when the trip straddles two days. # We have arbitrarily decided to bucket by start_time, so we follow the # same logic and bucket by the timezone of the start time. # # So the bucket for this day ends at the end of the day in EDT. # If we included any trips after noon in SF, e.g. going home from the # aiport, then it would extend to midnight PDT. # # The main argument that I'm trying to articulate is that we need to # come up with a notion of when the bucket ended. To some extent, we can # set this arbitrarily between the end of the last trip on the 3rd and the # and the start of the first trip on the 4th. # # Picking midnight on the timezone of the last trip on the 3rd is # reasonable since we know that no trips have started since the last # trip on the 3rd to the midnight of the 3rd EST. # So the worry here is that the first trip on the next day may be on # next day in the end timezone of the trip but on the same day in the # start timezone of the trip # e.g. reverse trip # maybe using the end of the section is best after all self.assertEqual(ms.ts, 1462213800) self.assertEqual(ms.local_dt.day, 3) self.assertEqual(ms.local_dt.timezone, IST) self.assertGreater(next_day_first_trip.data.start_ts, ms.ts)
def testLocalDtFillTimesDailyMultiTzGoingEast(self): key = (2016, 5, 3) test_section_list = [] # This is perhaps an extreme use case, but it is actually a fairly # common one with air travel # Step 1: user leaves SFO at 1am on the 3rd for JFK on a cross-country flight test_section_list.append( self._createTestSection(arrow.Arrow(2016,5,3,1, tzinfo=tz.gettz(PST)), PST)) # cross-country takes 8 hours, so she arrives in New York at 9:00 IST = 12:00am EDT # (taking into account the time difference) test_section_list[0]['data'] = self._fillDates(test_section_list[0].data, "end_", arrow.Arrow(2016,5,3,9,tzinfo=tz.gettz(PST)), EST) # Step 2: user leaves JFK for LHR at 1pm EST. test_section_list.append( self._createTestSection(arrow.Arrow(2016,5,3,13, tzinfo=tz.gettz(EST)), EST)) # cross-atlantic flight takes 7 hours, so she arrives at LHR at 8:00pm EDT # = 2am on the 4th local time test_section_list[1]['data'] = self._fillDates(test_section_list[1].data, "end_", arrow.Arrow(2016,5,3,21,tzinfo=tz.gettz(EST)), BST) # Then, she catches the train from the airport to her hotel in London # at 3am local time = 9:00pm EST # So as per local time, this is a new trip # # This clearly indicates why we need to use the timezone of the end of # last section to generate the timestamp for the range. If we use the # timezone of the beginning of the trip, we will say that the range ends # at midnight EST. But then it should include the next_day_first_trip, # which starts at 9pm EST, but it does not. # So we should use midnight BST instead. Note that midnight BST was # actually during the trip, but then it is no different from a regular # trip (in one timezone) where the trip spans the date change next_day_first_trip = self._createTestSection( arrow.Arrow(2016,5,4,3, tzinfo=tz.gettz(BST)), BST) section_group_df = pd.DataFrame( [self.ts._to_df_entry(s) for s in test_section_list]) logging.debug("first row is %s" % section_group_df.loc[0]) # Timestamps are monotonically increasing self.assertEqual(section_group_df.start_ts.tolist(), [1462262400, 1462294800]) # The timezone for the end time is PST since that's where we started # the first trip from self.assertEqual(earmt._get_tz(section_group_df), PST) ms = ecwms.ModeStatTimeSummary() earmt.local_dt_fill_times_daily(key, section_group_df, ms) logging.debug("before starting checks, ms = %s" % ms) self.assertEqual(ms.ts, 1462258800) self.assertEqual(ms.local_dt.day, 3) self.assertEqual(ms.local_dt.timezone, PST) # This test fails if it is not BST self.assertGreater(next_day_first_trip.data.start_ts, ms.ts)
def testLocalDtFillTimesDailyMultiTzGoingWest(self): key = (2016, 5, 3) test_section_list = [] # This is perhaps an extreme use case, but it is actually a fairly # common one with air travel # Step 1: user leaves Delhi at 1am on the 3rd for JFK on the non-stop test_section_list.append( self._createTestSection(arrow.Arrow(2016,5,3,1, tzinfo=tz.gettz(IST)), IST)) # non-stop takes 15 hours, so she arrives in New York at 16:00 IST = 6:30am EDT # (taking into account the time difference) # Step 2: user leaves JFK for SFO at 7am EST on a non-stop test_section_list.append( self._createTestSection(arrow.Arrow(2016,5,3,7, tzinfo=tz.gettz(EST)), EST)) # cross-country flight takes 8 hours, so she arrives in SFO at 15:00 EDT # = 12:00 PDT test_section_list[1]['data'] = self._fillDates(test_section_list[1].data, "end_", arrow.Arrow(2016,5,3,15,tzinfo=tz.gettz(EST)), PST) # Step 2: user starts a trip out of SFO a midnight of the 4th PST # (earliest possible trip) # for our timestamp algo to be correct, this has to be after the # timestamp for the range next_day_first_trip = self._createTestSection( arrow.Arrow(2016,5,4,0, tzinfo=tz.gettz(PST)), PST) section_group_df = pd.DataFrame( [self.ts._to_df_entry(s) for s in test_section_list]) # Timestamps are monotonically increasing self.assertEqual(section_group_df.start_ts.tolist(), [1462217400, 1462273200]) self.assertEqual(next_day_first_trip.data.start_ts, 1462345200) # The timezone for the end time is IST since that's where we started # the first trip self.assertEqual(earmt._get_tz(section_group_df), IST) ms = ecwms.ModeStatTimeSummary() earmt.local_dt_fill_times_daily(key, section_group_df, ms) logging.debug("before starting checks, ms = %s" % ms) # The end of the period is the end of the day in PST. So that we can # capture trip home from the airport, etc. # The next trip must start from the same timezone # if a trip straddles two timezones, we need to decide how the metrics # are split. A similar issue occurs when the trip straddles two days. # We have arbitrarily decided to bucket by start_time, so we follow the # same logic and bucket by the timezone of the start time. # # So the bucket for this day ends at the end of the day in EDT. # If we included any trips after noon in SF, e.g. going home from the # aiport, then it would extend to midnight PDT. # # The main argument that I'm trying to articulate is that we need to # come up with a notion of when the bucket ended. To some extent, we can # set this arbitrarily between the end of the last trip on the 3rd and the # and the start of the first trip on the 4th. # # Picking midnight on the timezone of the last trip on the 3rd is # reasonable since we know that no trips have started since the last # trip on the 3rd to the midnight of the 3rd EST. # So the worry here is that the first trip on the next day may be on # next day in the end timezone of the trip but on the same day in the # start timezone of the trip # e.g. reverse trip # maybe using the end of the section is best after all self.assertEqual(ms.ts, 1462213800) self.assertEqual(ms.local_dt.day, 3) self.assertEqual(ms.local_dt.timezone, IST) self.assertGreater(next_day_first_trip.data.start_ts, ms.ts)