def testZeroDurationPlaceInterpolationMultiSync(self): # Test for 545114feb5ac15caac4110d39935612525954b71 dataFile_1 = "emission/tests/data/real_examples/shankari_2016-01-12" dataFile_2 = "emission/tests/data/real_examples/shankari_2016-01-13" start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 1, 'day': 12}) start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 1, 'day': 13}) cacheKey_1 = "diary/trips-2016-01-12" cacheKey_2 = "diary/trips-2016-01-13" ground_truth_1 = json.load(open(dataFile_1+".ground_truth"), object_hook=bju.object_hook) ground_truth_2 = json.load(open(dataFile_2+".ground_truth"), object_hook=bju.object_hook) etc.setupRealExample(self, dataFile_1) etc.runIntakePipeline(self.testUUID) self.entries = json.load(open(dataFile_2), object_hook = bju.object_hook) etc.setupRealExampleWithEntries(self) etc.runIntakePipeline(self.testUUID) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) # Although we process the day's data in two batches, we should get the same result self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth_1).data) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) # Although we process the day's data in two batches, we should get the same result self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth_2).data)
def testAug10(self): # This is a more complex day. Tests: # PR #302 (trip to optometrist) # PR #352 (split optometrist trip) dataFile = "emission/tests/data/real_examples/shankari_2016-08-10" ld = ecwl.LocalDate({'year': 2016, 'month': 8, 'day': 10}) cacheKey = "diary/trips-2016-08-10" with open(dataFile+".ground_truth") as gfp: ground_truth = json.load(gfp, object_hook=bju.object_hook) etc.setupRealExample(self, dataFile) etc.runIntakePipeline(self.testUUID) # runIntakePipeline does not run the common trips, habitica or store views to cache # So let's manually store to the cache # tc_query = estt.TimeComponentQuery("data.star_local_dt", ld, ld) # enuah.UserCacheHandler.getUserCacheHandler(self.testUUID).storeTimelineToCache(tc_query) # cached_result = edb.get_usercache_db().find_one({'user_id': self.testUUID, # "metadata.key": cacheKey}) api_result = gfc.get_geojson_for_dt(self.testUUID, ld, ld) # self.compare_result(cached_result, ground_truth) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth).data)
def testJun20(self): # This is a fairly straightforward day. Tests mainly: # - ordering of trips # - handling repeated location entries with different write timestamps # We have two identical location points with ts = 1466436483.395 and write_ts = 1466436496.4, 1466436497.047 dataFile = "emission/tests/data/real_examples/shankari_2016-06-20" ld = ecwl.LocalDate({'year': 2016, 'month': 6, 'day': 20}) cacheKey = "diary/trips-2016-06-20" with open(dataFile+".ground_truth") as gfp: ground_truth = json.load(gfp, object_hook=bju.object_hook) etc.setupRealExample(self, dataFile) etc.runIntakePipeline(self.testUUID) # runIntakePipeline does not run the common trips, habitica or store views to cache # So let's manually store to the cache # tc_query = estt.TimeComponentQuery("data.star_local_dt", ld, ld) # enuah.UserCacheHandler.getUserCacheHandler(self.testUUID).storeTimelineToCache(tc_query) # cached_result = edb.get_usercache_db().find_one({'user_id': self.testUUID, # "metadata.key": cacheKey}) api_result = gfc.get_geojson_for_dt(self.testUUID, ld, ld) # self.compare_result(cached_result, ground_truth) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth).data)
def testJul22SplitAroundReboot(self): dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22" dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25" start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22}) start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25}) cacheKey_1 = "diary/trips-2016-07-22" cacheKey_2 = "diary/trips-2016-07-25" ground_truth_1 = json.load(open(dataFile_1+".ground_truth"), object_hook=bju.object_hook) ground_truth_2 = json.load(open(dataFile_2+".ground_truth"), object_hook=bju.object_hook) etc.setupRealExample(self, dataFile_1) etc.runIntakePipeline(self.testUUID) self.entries = json.load(open(dataFile_2), object_hook = bju.object_hook) etc.setupRealExampleWithEntries(self) etc.runIntakePipeline(self.testUUID) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) # Although we process the day's data in two batches, we should get the same result self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth_1).data) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) # Although we process the day's data in two batches, we should get the same result self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth_2).data)
def setUp(self): etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") self.androidUUID = self.testUUID etc.setupRealExample(self, "emission/tests/data/real_examples/iphone_2015-11-06") self.iosUUID = self.testUUID eaicf.filter_accuracy(self.iosUUID) logging.debug("androidUUID = %s, iosUUID = %s" % (self.androidUUID, self.iosUUID))
def testAug11(self): # This is a more complex day. Tests: # PR #352 (should not split trip to Oakland) # PR #348 (trip from station to OAK DOT) # PR #357 (trip to Radio Shack is complete and not truncated) # PR #345 (no cleaned trips are skipped) dataFile = "emission/tests/data/real_examples/shankari_2016-08-11" ld = ecwl.LocalDate({'year': 2016, 'month': 8, 'day': 11}) cacheKey = "diary/trips-2016-08-11" ground_truth = json.load(open(dataFile+".ground_truth"), object_hook=bju.object_hook) etc.setupRealExample(self, dataFile) etc.runIntakePipeline(self.testUUID) # runIntakePipeline does not run the common trips, habitica or store views to cache # So let's manually store to the cache # tc_query = estt.TimeComponentQuery("data.star_local_dt", ld, ld) # enuah.UserCacheHandler.getUserCacheHandler(self.testUUID).storeTimelineToCache(tc_query) # cached_result = edb.get_usercache_db().find_one({'user_id': self.testUUID, # "metadata.key": cacheKey}) api_result = gfc.get_geojson_for_dt(self.testUUID, ld, ld) # self.compare_result(cached_result, ground_truth) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth).data)
def setUp(self): self.clearRelatedDb() etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") eaicf.filter_accuracy(self.testUUID) estfm.move_all_filters_to_data() logging.info("After loading, timeseries db size = %s" % edb.get_timeseries_db().count()) self.day_start_ts = 1440658800 self.day_end_ts = 1440745200
def testResetToStart(self): """ - Load data for both days - Run pipelines - Verify that all is well - Reset to start - Verify that there is no analysis data - Re-run pipelines - Verify that all is well """ # Load all data dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22" dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25" start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22}) start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25}) cacheKey_1 = "diary/trips-2016-07-22" cacheKey_2 = "diary/trips-2016-07-25" ground_truth_1 = json.load(open(dataFile_1+".ground_truth"), object_hook=bju.object_hook) ground_truth_2 = json.load(open(dataFile_2+".ground_truth"), object_hook=bju.object_hook) # Run both pipelines etc.setupRealExample(self, dataFile_1) etc.runIntakePipeline(self.testUUID) self.entries = json.load(open(dataFile_2), object_hook = bju.object_hook) etc.setupRealExampleWithEntries(self) etc.runIntakePipeline(self.testUUID) # Check results: so far, so good api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth_1).data) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth_2).data) # Reset pipeline to start epr.reset_user_to_start(self.testUUID, is_dry_run=False) # Now there are no results api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.assertEqual(api_result, []) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.assertEqual(api_result, []) # Re-run the pipeline again etc.runIntakePipeline(self.testUUID) # Should be back to ground truth api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth_1).data) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth_2).data)
def testResetToPast(self): """ - Load data for both days - Run pipelines - Verify that all is well - Reset to a date before both - Verify that analysis data for the both days is removed - Re-run pipelines - Verify that all is well """ # Load all data dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22" dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25" start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22}) start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25}) cacheKey_1 = "diary/trips-2016-07-22" cacheKey_2 = "diary/trips-2016-07-25" ground_truth_1 = json.load(open(dataFile_1+".ground_truth"), object_hook=bju.object_hook) ground_truth_2 = json.load(open(dataFile_2+".ground_truth"), object_hook=bju.object_hook) # Run both pipelines etc.setupRealExample(self, dataFile_1) etc.runIntakePipeline(self.testUUID) self.entries = json.load(open(dataFile_2), object_hook = bju.object_hook) etc.setupRealExampleWithEntries(self) etc.runIntakePipeline(self.testUUID) # Verify that all is well api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth_1).data) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth_2).data) # Reset to a date well before the two days reset_ts = arrow.get("2015-07-24").timestamp epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False) # Data should be completely deleted api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.assertEqual(api_result, []) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.assertEqual(api_result, []) # Re-running the pipeline again etc.runIntakePipeline(self.testUUID) # Should reconstruct everything api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth_1).data) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth_2).data)
def setUp(self): etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") eaicf.filter_accuracy(self.testUUID) estfm.move_all_filters_to_data() logging.info("After loading, timeseries db size = %s" % edb.get_timeseries_db().count()) self.day_start_ts = 1440658800 self.day_end_ts = 1440745200 self.day_start_dt = ecwl.LocalDate({'year': 2015, 'month': 8, 'day': 27}) self.day_end_dt = ecwl.LocalDate({'year': 2015, 'month': 8, 'day': 27})
def setUp(self): etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") self.androidUUID = self.testUUID eaicf.filter_accuracy(self.androidUUID) self.testUUID = uuid.UUID("c76a0487-7e5a-3b17-a449-47be666b36f6") self.entries = json.load(open("emission/tests/data/real_examples/iphone_2015-11-06"), object_hook = bju.object_hook) etc.setupRealExampleWithEntries(self) self.iosUUID = self.testUUID eaicf.filter_accuracy(self.iosUUID)
def setUp(self): self.clearRelatedDb() edb.get_trip_db().remove() edb.get_section_db().remove() edb.get_trip_new_db().remove() edb.get_section_new_db().remove() etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-21") eaicf.filter_accuracy(self.testUUID) estfm.move_all_filters_to_data() eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID)
def testSunilShortTrips(self): dataFile = "emission/tests/data/real_examples/sunil_2016-07-27" start_ld = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 27}) cacheKey = "diary/trips-2016-07-27" ground_truth = json.load(open(dataFile+".ground_truth"), object_hook=bju.object_hook) etc.setupRealExample(self, dataFile) etc.runIntakePipeline(self.testUUID) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, start_ld) # Although we process the day's data in two batches, we should get the same result self.assertEqual(api_result, [])
def testAirTripHawaiiEnd(self): dataFile = "emission/tests/data/real_examples/shankari_2016-08-04" start_ld = ecwl.LocalDate({'year': 2016, 'month': 8, 'day': 4}) cacheKey = "diary/trips-2016-07-27" ground_truth = json.load(open(dataFile+".ground_truth"), object_hook=bju.object_hook) etc.setupRealExample(self, dataFile) etc.runIntakePipeline(self.testUUID) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, start_ld) # Although we process the day's data in two batches, we should get the same result self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth).data)
def setUp(self): self.analysis_conf_path = \ etc.set_analysis_config("intake.cleaning.filter_accuracy.enable", True) etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") self.androidUUID = self.testUUID self.testUUID = uuid.UUID("c76a0487-7e5a-3b17-a449-47be666b36f6") self.entries = json.load(open("emission/tests/data/real_examples/iphone_2015-11-06"), object_hook = bju.object_hook) etc.setupRealExampleWithEntries(self) self.iosUUID = self.testUUID eaicf.filter_accuracy(self.iosUUID) logging.debug("androidUUID = %s, iosUUID = %s" % (self.androidUUID, self.iosUUID))
def testJumpSmoothingSectionStart(self): dataFile = "emission/tests/data/real_examples/shankari_2016-independence_day_jump_bus_start" start_ld = ecwl.LocalDate({'year': 2016, 'month': 8, 'day': 15}) cacheKey = "diary/trips-2016-08-15" ground_truth = json.load(open("emission/tests/data/real_examples/shankari_2016-independence_day.ground_truth"), object_hook=bju.object_hook) etc.setupRealExample(self, dataFile) etc.runIntakePipeline(self.testUUID) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, start_ld) # Although we process the day's data in two batches, we should get the same result self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth).data)
def testJan16SpeedAssert(self): # Test for https://github.com/e-mission/e-mission-server/issues/457 dataFile = "emission/tests/data/real_examples/another_speed_assertion_failure.jan-16" start_ld = ecwl.LocalDate({'year': 2016, 'month': 1, 'day': 16}) cacheKey = "diary/trips-2016-01-16" ground_truth = json.load(open("emission/tests/data/real_examples/another_speed_assertion_failure.jan-16.ground_truth"), object_hook=bju.object_hook) etc.setupRealExample(self, dataFile) etc.runIntakePipeline(self.testUUID) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, start_ld) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth).data)
def testOverriddenModeHack(self): # Test for https://github.com/e-mission/e-mission-server/issues/457 dataFile = "emission/tests/data/real_examples/test_overriden_mode_hack.jul-31" start_ld = ecwl.LocalDate({'year': 2017, 'month': 7, 'day': 31}) cacheKey = "diary/trips-2017-07-31" ground_truth = json.load(open("emission/tests/data/real_examples/test_overriden_mode_hack.jul-31.ground_truth"), object_hook=bju.object_hook) etc.setupRealExample(self, dataFile) etc.runIntakePipeline(self.testUUID) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, start_ld) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth).data)
def testTsMismatch(self): # Test for https://github.com/e-mission/e-mission-server/issues/457 dataFile = "emission/tests/data/real_examples/shankari_single_positional_indexer.dec-12" start_ld = ecwl.LocalDate({'year': 2016, 'month': 12, 'day': 12}) cacheKey = "diary/trips-2016-12-12" ground_truth = json.load(open("emission/tests/data/real_examples/shankari_single_positional_indexer.dec-12.ground_truth"), object_hook=bju.object_hook) etc.setupRealExample(self, dataFile) etc.runIntakePipeline(self.testUUID) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, start_ld) self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth).data)
def testFeb22ShortTripsDistance(self): dataFile = "emission/tests/data/real_examples/iphone_3_2016-02-22" start_ld = ecwl.LocalDate({'year': 2016, 'month': 2, 'day': 22}) end_ld = ecwl.LocalDate({'year': 2016, 'month': 2, 'day': 22}) cacheKey = "diary/trips-2016-02-22" ground_truth = json.load(open(dataFile+".ground_truth"), object_hook=bju.object_hook) etc.setupRealExample(self, dataFile) etc.runIntakePipeline(self.testUUID) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld) # Although we process the day's data in two batches, we should get the same result self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth).data)
def testIndexLengthChange(self): # Test for 94f67b4848611fa01c4327a0fa0cab97c2247744 dataFile = "emission/tests/data/real_examples/shankari_2015-08-23" start_ld = ecwl.LocalDate({'year': 2015, 'month': 8, 'day': 23}) cacheKey = "diary/trips-2015-08-23" ground_truth = json.load(open("emission/tests/data/real_examples/shankari_2015-08-23.ground_truth"), object_hook=bju.object_hook) etc.setupRealExample(self, dataFile) etc.runIntakePipeline(self.testUUID) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, start_ld) # Although we process the day's data in two batches, we should get the same result self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth).data)
def setUp(self): etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-21") self.testUUID1 = self.testUUID etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") etc.runIntakePipeline(self.testUUID1) etc.runIntakePipeline(self.testUUID) logging.info( "After loading, timeseries db size = %s" % edb.get_timeseries_db().count()) self.aug_start_ts = 1438387200 self.aug_end_ts = 1441065600 self.day_start_dt = esdldq.get_local_date(self.aug_start_ts, "America/Los_Angeles") self.day_end_dt = esdldq.get_local_date(self.aug_end_ts, "America/Los_Angeles")
def testIosJumpsAndUntrackedSquishing(self): # Test for a2c0ee4e3ceafa822425ceef299dcdb01c9b32c9 dataFile = "emission/tests/data/real_examples/sunil_2016-07-20" start_ld = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 20}) cacheKey = "diary/trips-2016-07-20" ground_truth = json.load(open("emission/tests/data/real_examples/sunil_2016-07-20.ground_truth"), object_hook=bju.object_hook) etc.setupRealExample(self, dataFile) etc.runIntakePipeline(self.testUUID) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, start_ld) # Although we process the day's data in two batches, we should get the same result self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth).data)
def testAug27TooMuchExtrapolation(self): dataFile = "emission/tests/data/real_examples/shankari_2015-aug-27" start_ld = ecwl.LocalDate({'year': 2015, 'month': 8, 'day': 27}) end_ld = ecwl.LocalDate({'year': 2015, 'month': 8, 'day': 27}) cacheKey = "diary/trips-2015-08-27" with open(dataFile+".ground_truth") as gfp: ground_truth = json.load(gfp, object_hook=bju.object_hook) etc.setupRealExample(self, dataFile) etc.runIntakePipeline(self.testUUID) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld) # Although we process the day's data in two batches, we should get the same result self.compare_result(ad.AttrDict({'result': api_result}).result, ad.AttrDict(ground_truth).data)
def testCreatePlace(self): etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") eaicf.filter_accuracy(self.testUUID) estfm.move_all_filters_to_data() eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID) data = eamtcp.main(self.testUUID) esdcpq.create_places(data, self.testUUID) places = esdcpq.get_all_common_places_for_user(self.testUUID) places_list = [] for p in places: places_list.append(esdcpq.make_common_place(p)) for place in places_list: self.assertIsNotNone(place.location) self.assertIsNotNone(place["successors"])
def testAllQuery(self): dataFile = "emission/tests/data/real_examples/shankari_2016-06-20" ld = ecwl.LocalDate({'year': 2016, 'month': 6, 'day': 20}) etc.setupRealExample(self, dataFile) etc.runIntakePipeline(self.testUUID) self.testUUIDList = [self.testUUID] uuid_list = pointcount.query({ "time_type": None, "modes": None, "sel_region": None }) logging.debug("uuid_list = %s" % uuid_list) self.assertGreater(len(uuid_list), 1) self.assertIn(self.testUUID, uuid_list)
def testTimeQueryPos(self): dataFile = "emission/tests/data/real_examples/shankari_2016-06-20" ld = ecwl.LocalDate({'year': 2016, 'month': 6, 'day': 20}) etc.setupRealExample(self, dataFile) etc.runIntakePipeline(self.testUUID) self.testUUIDList = [self.testUUID] uuid_list = pointcount.query({ "time_type": "local_date", "from_local_date": dict(ld), "to_local_date": dict(ld), "modes": None, "sel_region": None }) logging.debug("uuid_list = %s" % uuid_list) self.assertEqual(uuid_list, [self.testUUID])
def testIsMatchedUser(self): # Load data for the Bay Area dataFileba = "emission/tests/data/real_examples/shankari_2016-06-20" ldba = ecwl.LocalDate({'year': 2016, 'month': 6, 'day': 20}) etc.setupRealExample(self, dataFileba) testUUIDba = self.testUUID etc.runIntakePipeline(testUUIDba) logging.debug("uuid for the bay area = %s " % testUUIDba) # Load data for Hawaii dataFilehi = "emission/tests/data/real_examples/shankari_2016-07-27" ldhi = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 27}) etc.setupRealExample(self, dataFilehi) testUUIDhi = self.testUUID etc.runIntakePipeline(testUUIDhi) logging.debug("uuid for hawaii = %s " % testUUIDhi) self.testUUIDList = [testUUIDba, testUUIDhi] air_query_spec = { "time_type": "local_date", "from_local_date": { "year": 2016, "month": 2}, "to_local_date": { "year": 2016, "month": 9}, "freq": 'DAILY', "checks": [ { "modes": ['WALKING', 'ON_FOOT'], "metric": "count", "threshold": {"$gt": 5} }, { "modes": ['AIR_OR_HSR'], "metric": "count", "threshold": {"$gt": 1} } ] } # Since this requires at least one air trip, this will only return the # hawaii trip self.assertTrue(tripmetrics.is_matched_user(testUUIDhi, air_query_spec)) self.assertFalse(tripmetrics.is_matched_user(testUUIDba, air_query_spec))
def setUp(self): # Thanks to M&J for the number! np.random.seed(61297777) self.copied_model_path = etc.copy_dummy_seed_for_inference() dataFile = "emission/tests/data/real_examples/shankari_2016-08-10" start_ld = ecwl.LocalDate({'year': 2016, 'month': 8, 'day': 9}) end_ld = ecwl.LocalDate({'year': 2016, 'month': 8, 'day': 10}) cacheKey = "diary/trips-2016-08-10" etc.setupRealExample(self, dataFile) etc.runIntakePipeline(self.testUUID) # Default intake pipeline now includes mode inference # this is correct in general, but causes errors while testing the mode inference # because then that step is effectively run twice. This code # rolls back the results of running the mode inference as part of the # pipeline and allows us to correctly test the mode inference pipeline again. pipeline.del_objects_after(self.testUUID, 0, is_dry_run=False) self.pipeline = pipeline.ModeInferencePipeline() self.pipeline.loadModelStage()
def testUserInputRealData(self): np.random.seed(61297777) dataFile = "emission/tests/data/real_examples/shankari_single_positional_indexer.dec-12" etc.setupRealExample(self, dataFile) self.testUserId = self.testUUID # At this point, we have only raw data, no trips etc.runIntakePipeline(self.testUUID) # At this point, we have trips # Let's retrieve them ts = esta.TimeSeries.get_time_series(self.testUUID) ct_df = ts.get_data_df("analysis/cleaned_trip", time_query=None) self.assertEqual(len(ct_df), 4) # Now, let's load the mode_confirm and purpose_confirm objects mode_confirm_list = json.load(open("emission/tests/data/real_examples/shankari_single_positional_indexer.dec-12.mode_confirm"), object_hook=bju.object_hook) self.assertEqual(len(mode_confirm_list), 5) purpose_confirm_list = json.load(open("emission/tests/data/real_examples/shankari_single_positional_indexer.dec-12.purpose_confirm"), object_hook=bju.object_hook) self.assertEqual(len(purpose_confirm_list), 7) for mc in mode_confirm_list: mc["user_id"] = self.testUUID ts.insert(mc) for pc in purpose_confirm_list: pc["user_id"] = self.testUUID ts.insert(pc) mc_label_list = [] pc_label_list = [] for trip_id in ct_df._id: mc = esdt.get_user_input_for_trip(esda.CLEANED_TRIP_KEY, self.testUserId, ct_df._id[0], "manual/mode_confirm") mc_label_list.append(mc.data.label) pc = esdt.get_user_input_for_trip(esda.CLEANED_TRIP_KEY, self.testUserId, ct_df._id[0], "manual/purpose_confirm") pc_label_list.append(pc.data.label) self.assertEqual(mc_label_list, 4 * ['bike']) self.assertEqual(pc_label_list, 4 * ['pick_drop'])
def setUp(self): self.clearRelatedDb() etc.setupRealExample( self, "emission/tests/data/real_examples/shankari_2015-aug-27") eaicf.filter_accuracy(self.testUUID) estfm.move_all_filters_to_data()
def testResetToTsInMiddleOfPlace(self): """ - Load data for both days - Run pipelines - Verify that all is well - Reset to a date between the two - Verify that analysis data for the first day is unchanged - Verify that analysis data for the second day does not exist - Re-run pipelines - Verify that all is well """ # Load all data dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22" dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25" start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22}) start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25}) cacheKey_1 = "diary/trips-2016-07-22" cacheKey_2 = "diary/trips-2016-07-25" ground_truth_1 = json.load(open(dataFile_1 + ".ground_truth"), object_hook=bju.object_hook) ground_truth_2 = json.load(open(dataFile_2 + ".ground_truth"), object_hook=bju.object_hook) # Run both pipelines etc.setupRealExample(self, dataFile_1) etc.runIntakePipeline(self.testUUID) self.entries = json.load(open(dataFile_2), object_hook=bju.object_hook) etc.setupRealExampleWithEntries(self) etc.runIntakePipeline(self.testUUID) # Check results: so far, so good api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.compare_result( ad.AttrDict({ 'result': api_result }).result, ad.AttrDict(ground_truth_1).data) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.compare_result( ad.AttrDict({ 'result': api_result }).result, ad.AttrDict(ground_truth_2).data) # Reset pipeline to july 23. # Note that this is actually 22nd 16:00 PDT, so this is partway # through the 22nd reset_ts = arrow.get("2016-07-23").timestamp epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False) # First day is unchanged, except that the last place doesn't have # exit data. # TODO: Modify ground truth to capture this change # Until then, we know that this will fail # api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) # self.compare_result(ad.AttrDict({'result': api_result}).result, # ad.AttrDict(ground_truth_1).data) # Second day does not exist api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) logging.debug(json.dumps(api_result, indent=4, default=bju.default)) self.assertEqual(api_result, []) # Re-run the pipeline again etc.runIntakePipeline(self.testUUID) # Should be back to ground truth api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.compare_result( ad.AttrDict({ 'result': api_result }).result, ad.AttrDict(ground_truth_1).data) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.compare_result( ad.AttrDict({ 'result': api_result }).result, ad.AttrDict(ground_truth_2).data)
def testResetToTsInMiddleOfTrip(self): """ - Load data for both days - Run pipelines - Verify that all is well - Reset to a date between the two - Verify that analysis data for the first day is unchanged - Verify that analysis data for the second day does not exist - Re-run pipelines - Verify that all is well """ # Load all data dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22" dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25" start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22}) start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25}) cacheKey_1 = "diary/trips-2016-07-22" cacheKey_2 = "diary/trips-2016-07-25" ground_truth_1 = json.load(open(dataFile_1 + ".ground_truth"), object_hook=bju.object_hook) ground_truth_2 = json.load(open(dataFile_2 + ".ground_truth"), object_hook=bju.object_hook) # Run both pipelines etc.setupRealExample(self, dataFile_1) etc.runIntakePipeline(self.testUUID) self.entries = json.load(open(dataFile_2), object_hook=bju.object_hook) etc.setupRealExampleWithEntries(self) etc.runIntakePipeline(self.testUUID) # Check results: so far, so good api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.compare_result( ad.AttrDict({ 'result': api_result }).result, ad.AttrDict(ground_truth_1).data) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.compare_result( ad.AttrDict({ 'result': api_result }).result, ad.AttrDict(ground_truth_2).data) # Reset pipeline to july 24. # Note that this is actually 23nd 16:00 PDT # This will reset in the middle of the untracked time, which is # technically a trip, and will allow us to test the trip resetting # code reset_ts = arrow.get("2016-07-24").timestamp epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False) # Second day does not exist api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) logging.debug(json.dumps(api_result, indent=4, default=bju.default)) self.assertEqual(api_result, []) # Re-run the pipeline again etc.runIntakePipeline(self.testUUID) # Should be back to ground truth api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.compare_result( ad.AttrDict({ 'result': api_result }).result, ad.AttrDict(ground_truth_1).data) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.compare_result( ad.AttrDict({ 'result': api_result }).result, ad.AttrDict(ground_truth_2).data)
def setUp(self): etc.setupRealExample( self, "emission/tests/data/real_examples/shankari_2015-aug-21")
def testQueryMatching(self): # Load data for the Bay Area dataFileba = "emission/tests/data/real_examples/shankari_2016-06-20" ldba = ecwl.LocalDate({'year': 2016, 'month': 6, 'day': 20}) etc.setupRealExample(self, dataFileba) testUUIDba = self.testUUID edb.get_uuid_db().insert({"uuid": testUUIDba, "user_email": "*****@*****.**"}) etc.runIntakePipeline(testUUIDba) logging.debug("uuid for the bay area = %s " % testUUIDba) # Load data for Hawaii dataFilehi = "emission/tests/data/real_examples/shankari_2016-07-27" ldhi = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 27}) etc.setupRealExample(self, dataFilehi) testUUIDhi = self.testUUID edb.get_uuid_db().insert({"uuid": testUUIDhi, "user_email": "*****@*****.**"}) etc.runIntakePipeline(testUUIDhi) logging.debug("uuid for hawaii = %s " % testUUIDhi) self.testUUIDList = [testUUIDba, testUUIDhi] air_query_spec = { "time_type": "local_date", "from_local_date": { "year": 2016, "month": 2}, "to_local_date": { "year": 2016, "month": 9}, "freq": 'DAILY', "checks": [ { "modes": ['WALKING', 'ON_FOOT'], "metric": "count", "threshold": {"$gt": 5} }, { "modes": ['AIR_OR_HSR'], "metric": "count", "threshold": {"$gt": 1} } ] } # Since this requires at least one air trip, this will only return the # hawaii trip self.assertEqual(tripmetrics.query(air_query_spec), [testUUIDhi]) walk_drive_spec = { "time_type": "local_date", "from_local_date": { "year": 2016, "month": 2}, "to_local_date": { "year": 2016, "month": 9}, "freq": 'DAILY', "checks": [ { "modes": ['WALKING', 'ON_FOOT'], "metric": "count", "threshold": {"$gt": 5} }, { "modes": ['IN_VEHICLE'], "metric": "count", "threshold": {"$gt": 1} } ] } # Since this only requires walk and bike, will return both trips # We can't just do a simple equals check since the uuids may not always # be returned in the same order walk_drive_result = tripmetrics.query(walk_drive_spec) self.assertEqual(len(walk_drive_result), 2) self.assertIn(testUUIDhi, walk_drive_result) self.assertIn(testUUIDba, walk_drive_result)
def testResetToPast(self): """ - Load data for both days - Run pipelines - Verify that all is well - Reset to a date before both - Verify that analysis data for the both days is removed - Re-run pipelines - Verify that all is well """ # Load all data dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22" dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25" start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22}) start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25}) cacheKey_1 = "diary/trips-2016-07-22" cacheKey_2 = "diary/trips-2016-07-25" ground_truth_1 = json.load(open(dataFile_1 + ".ground_truth"), object_hook=bju.object_hook) ground_truth_2 = json.load(open(dataFile_2 + ".ground_truth"), object_hook=bju.object_hook) # Run both pipelines etc.setupRealExample(self, dataFile_1) etc.runIntakePipeline(self.testUUID) self.entries = json.load(open(dataFile_2), object_hook=bju.object_hook) etc.setupRealExampleWithEntries(self) etc.runIntakePipeline(self.testUUID) # Verify that all is well api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.compare_result( ad.AttrDict({ 'result': api_result }).result, ad.AttrDict(ground_truth_1).data) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.compare_result( ad.AttrDict({ 'result': api_result }).result, ad.AttrDict(ground_truth_2).data) # Reset to a date well before the two days reset_ts = arrow.get("2015-07-24").timestamp epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False) # Data should be completely deleted api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.assertEqual(api_result, []) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.assertEqual(api_result, []) # Re-running the pipeline again etc.runIntakePipeline(self.testUUID) # Should reconstruct everything api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.compare_result( ad.AttrDict({ 'result': api_result }).result, ad.AttrDict(ground_truth_1).data) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.compare_result( ad.AttrDict({ 'result': api_result }).result, ad.AttrDict(ground_truth_2).data)
def testResetToStart(self): """ - Load data for both days - Run pipelines - Verify that all is well - Reset to start - Verify that there is no analysis data - Re-run pipelines - Verify that all is well """ # Load all data dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22" dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25" start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22}) start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25}) cacheKey_1 = "diary/trips-2016-07-22" cacheKey_2 = "diary/trips-2016-07-25" ground_truth_1 = json.load(open(dataFile_1 + ".ground_truth"), object_hook=bju.object_hook) ground_truth_2 = json.load(open(dataFile_2 + ".ground_truth"), object_hook=bju.object_hook) # Run both pipelines etc.setupRealExample(self, dataFile_1) etc.runIntakePipeline(self.testUUID) self.entries = json.load(open(dataFile_2), object_hook=bju.object_hook) etc.setupRealExampleWithEntries(self) etc.runIntakePipeline(self.testUUID) # Check results: so far, so good api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.compare_result( ad.AttrDict({ 'result': api_result }).result, ad.AttrDict(ground_truth_1).data) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.compare_result( ad.AttrDict({ 'result': api_result }).result, ad.AttrDict(ground_truth_2).data) # Reset pipeline to start epr.reset_user_to_start(self.testUUID, is_dry_run=False) # Now there are no results api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.assertEqual(api_result, []) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.assertEqual(api_result, []) # Re-run the pipeline again etc.runIntakePipeline(self.testUUID) # Should be back to ground truth api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1) self.compare_result( ad.AttrDict({ 'result': api_result }).result, ad.AttrDict(ground_truth_1).data) api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2) self.compare_result( ad.AttrDict({ 'result': api_result }).result, ad.AttrDict(ground_truth_2).data)
def testGeoQuery(self): # Load data for the Bay Area dataFileba = "emission/tests/data/real_examples/shankari_2016-06-20" ldba = ecwl.LocalDate({'year': 2016, 'month': 6, 'day': 20}) etc.setupRealExample(self, dataFileba) testUUIDba = self.testUUID etc.runIntakePipeline(testUUIDba) logging.debug("uuid for the bay area = %s " % testUUIDba) # Load data for Hawaii dataFilehi = "emission/tests/data/real_examples/shankari_2016-07-27" ldhi = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 27}) etc.setupRealExample(self, dataFilehi) testUUIDhi = self.testUUID etc.runIntakePipeline(testUUIDhi) logging.debug("uuid for hawaii = %s " % testUUIDhi) self.testUUIDList = [testUUIDba, testUUIDhi] uuid_listba = pointcount.query({ "time_type": "local_date", "from_local_date": { 'year': 2016, 'month': 5, 'day': 20 }, "to_local_date": { 'year': 2016, 'month': 10, 'day': 20 }, "modes": None, "sel_region": { "geometry": { "type": "Polygon", "coordinates": [[[-122.0731149, 37.4003834], [-122.07302, 37.3804759], [-122.1232527, 37.4105125], [-122.1101028, 37.4199638], [-122.0731149, 37.4003834]]] } } }) logging.debug("uuid_list for the bay area = %s" % uuid_listba) # We should only get uuid from the bay area back self.assertEqual(uuid_listba, [testUUIDba]) uuid_listhi = pointcount.query({ "time_type": None, "modes": None, "sel_region": { "geometry": { "type": "Polygon", "coordinates": [[[-157.9614841, 21.3631988], [-157.9267982, 21.3780131], [-157.7985052, 21.279961], [-157.8047025, 21.2561483], [-157.9614841, 21.3631988]]] } } }) logging.debug("uuid_list for hawaii = %s" % uuid_listhi) # We should only get uuid from the 21st back self.assertEqual(uuid_listhi, [testUUIDhi])
def setUp(self): etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") estfm.move_all_filters_to_data()
def setUp(self): self.copied_model_path = etc.copy_dummy_seed_for_inference() etc.setupRealExample( self, "emission/tests/data/real_examples/shankari_2015-aug-27") eaicf.filter_accuracy(self.testUUID)
def setUp(self): self.clearRelatedDb() etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27")
def setUp(self): etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") eaicf.filter_accuracy(self.testUUID)