def testStopSectionTimeline(self): eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID) tl = esdt.get_raw_timeline(self.testUUID, self.day_start_ts, self.day_end_ts) for i, element in enumerate(tl): logging.debug("%s: %s" % (i, type(element))) curr_type = self.get_type(element) if curr_type == ecwrt.Rawtrip: curr_tl = esdtq.get_raw_timeline_for_trip( self.testUUID, element.get_id()) logging.debug("Got timeline %s for trip %s" % (curr_tl, element.start_fmt_time)) prev_sub_type = None prev_element = None checked_count = 0 j = 0 for j, curr_element in enumerate(curr_tl): logging.debug("curr_element = %s" % curr_element) curr_sub_type = self.get_type(curr_element) if prev_sub_type is not None: checked_count = checked_count + 1 self.assertNotEqual(prev_sub_type, curr_sub_type) if prev_sub_type == ecws.Stop: self.assertEqual(prev_element.starting_section, curr_element.get_id()) else: self.assertEqual(prev_sub_type, ecwsc.Section) self.assertEqual(prev_element.end_stop, curr_element.get_id()) prev_sub_type = curr_sub_type prev_element = curr_element self.assertEqual(checked_count, j)
def testE2E(self): eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID) esdtmq.make_tour_model_from_raw_user_data(self.testUUID) tm = esdtmq.get_tour_model(self.testUUID) self.assertTrue(len(tm["common_trips"]) > 0) self.assertTrue(len(tm["common_places"]) > 0)
def testTripGeojson(self): eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID) eaicl.filter_current_sections(self.testUUID) tl = esdtl.get_raw_timeline(self.testUUID, 1440658800, 1440745200) self.assertEquals(len(tl.trips), 8) eaicr.clean_and_resample(self.testUUID) tl = esdtl.get_cleaned_timeline(self.testUUID, 1440658800, 1440745200) tl.fill_start_end_places() created_trips = tl.trips self.assertEquals(len(created_trips), 7) trip_geojson = gjfc.trip_to_geojson(created_trips[0], tl) logging.debug("first trip_geojson = %s" % bju.dumps(trip_geojson, indent=4)) self.assertEquals(trip_geojson.type, "FeatureCollection") self.assertEquals(trip_geojson.properties["feature_type"], "trip") self.assertEquals(len(trip_geojson.features), 5) day_geojson = gjfc.get_geojson_for_timeline(self.testUUID, tl) self.assertEquals(len(day_geojson), 7) self.assertEquals(day_geojson[-1].type, "FeatureCollection") self.assertEquals(day_geojson[-1].properties["feature_type"], "trip") self.assertEquals(len(day_geojson[-1].features), 5)
def testTripGeojson(self): eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID) eaicl.filter_current_sections(self.testUUID) tl = esdtl.get_raw_timeline(self.testUUID, 1440658800, 1440745200) self.assertEquals(len(tl.trips), 9) eaicr.clean_and_resample(self.testUUID) eacimp.predict_mode(self.testUUID) tl = esdtl.get_cleaned_timeline(self.testUUID, 1440658800, 1440745200) tl.fill_start_end_places() created_trips = tl.trips self.assertEquals(len(created_trips), 9) trip_geojson = gjfc.trip_to_geojson(created_trips[0], tl) logging.debug("first trip_geojson = %s" % bju.dumps(trip_geojson, indent=4)) self.assertEquals(trip_geojson.type, "FeatureCollection") self.assertEquals(trip_geojson.properties["feature_type"], "trip") self.assertEquals(len(trip_geojson.features), 5) day_geojson = gjfc.get_geojson_for_timeline(self.testUUID, tl) self.assertEquals(len(day_geojson), 8) self.assertEquals(day_geojson[-1].type, "FeatureCollection") self.assertEquals(day_geojson[-1].properties["feature_type"], "trip") self.assertEquals(len(day_geojson[-1].features), 5)
def testStopSectionTimeline(self): eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID) tl = esdt.get_raw_timeline(self.testUUID, self.day_start_ts, self.day_end_ts) for i, element in enumerate(tl): logging.debug("%s: %s" % (i, type(element))) curr_type = self.get_type(element) if curr_type == ecwrt.Rawtrip: curr_tl = esdtq.get_raw_timeline_for_trip(self.testUUID, element.get_id()) logging.debug("Got timeline %s for trip %s" % (curr_tl, element.start_fmt_time)) prev_sub_type = None prev_element = None checked_count = 0 j = 0 for j, curr_element in enumerate(curr_tl): logging.debug("curr_element = %s" % curr_element) curr_sub_type = self.get_type(curr_element) if prev_sub_type is not None: checked_count = checked_count + 1 self.assertNotEqual(prev_sub_type, curr_sub_type) if prev_sub_type == ecws.Stop: self.assertEqual(prev_element.starting_section, curr_element.get_id()) else: self.assertEqual(prev_sub_type, ecwsc.Section) self.assertEqual(prev_element.end_stop, curr_element.get_id()) prev_sub_type = curr_sub_type prev_element = curr_element self.assertEqual(checked_count, j)
def setUp(self): self.clearRelatedDb() edb.get_trip_db().remove() edb.get_section_db().remove() edb.get_trip_new_db().remove() edb.get_section_new_db().remove() etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-21") eaicf.filter_accuracy(self.testUUID) estfm.move_all_filters_to_data() eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID)
def testTripGeojson(self): eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID) tl = esdtl.get_timeline(self.testUUID, 1440658800, 1440745200) tl.fill_start_end_places() created_trips = tl.trips self.assertEquals(len(created_trips), 8) trip_geojson = gjfc.trip_to_geojson(created_trips[0], tl) logging.debug("trip_geojson = %s" % gj.dumps(trip_geojson, indent=4))
def testRemoveAllOutliers(self): etc.setupRealExample( self, "emission/tests/data/real_examples/shankari_2016-06-20") self.ts = esta.TimeSeries.get_time_series(self.testUUID) eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID) eaicl.filter_current_sections(self.testUUID) # get all sections sections = [ ecwe.Entry(s) for s in self.ts.find_entries([esda.RAW_SECTION_KEY], time_query=None) ] for section in sections: filtered_points_entry_doc = self.ts.get_entry_at_ts( "analysis/smoothing", "data.section", section.get_id()) if filtered_points_entry_doc is not None: logging.debug("Found smoothing result for section %s" % section.get_id()) # Setting the set of deleted points to everything loc_tq = esda.get_time_query_for_trip_like( esda.RAW_SECTION_KEY, section.get_id()) loc_df = self.ts.get_data_df("background/filtered_location", loc_tq) filtered_points_entry_doc["data"]["deleted_points"] = loc_df[ "_id"].tolist() self.ts.update(ecwe.Entry(filtered_points_entry_doc)) # All we care is that this should not crash. eaicr.clean_and_resample(self.testUUID) # Most of the trips have zero length, but apparently one has non-zero length # because the stop length is non zero!! # So there is only one cleaned trip left cleaned_trips_df = self.ts.get_data_df(esda.CLEANED_TRIP_KEY, time_query=None) self.assertEqual(len(cleaned_trips_df), 1) # We don't support squishing sections, but we only store stops and sections # for non-squished trips. And this non-squished trip happens to have # two sections and one stop cleaned_sections_df = self.ts.get_data_df(esda.CLEANED_SECTION_KEY, time_query=None) self.assertEqual(len(cleaned_sections_df), 2) self.assertEqual(cleaned_sections_df.distance.tolist(), [0, 0]) cleaned_stops_df = self.ts.get_data_df(esda.CLEANED_STOP_KEY, time_query=None) self.assertEqual(len(cleaned_stops_df), 1) self.assertAlmostEqual(cleaned_stops_df.distance[0], 3252, places=0)
def testCreatePlace(self): etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") eaicf.filter_accuracy(self.testUUID) estfm.move_all_filters_to_data() eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID) data = eamtcp.main(self.testUUID) esdcpq.create_places(data, self.testUUID) places = esdcpq.get_all_common_places_for_user(self.testUUID) places_list = [] for p in places: places_list.append(esdcpq.make_common_place(p)) for place in places_list: self.assertIsNotNone(place.location) self.assertIsNotNone(place["successors"])
def testSegmentationWrapperWithAutoTrip(self): eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID) tq_trip = enua.UserCache.TimeQuery("start_ts", 1440658800, 1440745200) created_trips = esdt.get_trips(self.testUUID, tq_trip) for i, trip in enumerate(created_trips): created_stops = esdt.get_stops_for_trip(self.testUUID, trip.get_id()) created_sections = esdt.get_sections_for_trip(self.testUUID, trip.get_id()) for j, stop in enumerate(created_stops): logging.info("Retrieved stops %s: %s -> %s" % (j, stop.enter_fmt_time, stop.exit_fmt_time)) for j, section in enumerate(created_sections): logging.info("Retrieved sections %s: %s -> %s" % (j, section.start_fmt_time, section.end_fmt_time))
def testSegmentationWrapperIOS(self): eaist.segment_current_trips(self.iosUUID) # The previous line should have created places and trips and stored # them into the database. Now, we want to query to ensure that they # were created correctly. tq_place = estt.TimeQuery("data.enter_ts", 1446796800, 1446847600) created_places_entries = esda.get_entries(esda.RAW_PLACE_KEY, self.iosUUID, tq_place) tq_trip = estt.TimeQuery("data.start_ts", 1446796800, 1446847600) created_trips_entries = esda.get_entries(esda.RAW_TRIP_KEY, self.iosUUID, tq_trip) for i, place in enumerate(created_places_entries): logging.debug( "Retrieved places %s: %s -> %s" % (i, place.data.enter_fmt_time, place.data.exit_fmt_time)) for i, trip in enumerate(created_trips_entries): logging.debug( "Retrieved trips %s: %s -> %s" % (i, trip.data.start_fmt_time, trip.data.end_fmt_time)) # We expect there to be 4 places, but the first one is that start of # the chain, so it has a start_time of None and it won't be retrieved # by the query on the start_time that we show here. self.assertEqual(len(created_places_entries), 2) self.assertEqual(len(created_trips_entries), 2) # Pick the first two trips and the first place and ensure that they are all linked correctly # Note that this is the first place, not the second place because the true first place will not # be retrieved by the query, as shown above # The first trip here is a dummy trip, so let's check the second and third trip instead trip0 = created_trips_entries[0] trip1 = created_trips_entries[1] place0 = created_places_entries[0] self.assertEqual(trip0.data.end_place, place0.get_id()) self.assertEqual(trip1.data.start_place, place0.get_id()) self.assertEqual(place0.data.ending_trip, trip0.get_id()) self.assertEqual(place0.data.starting_trip, trip1.get_id()) self.assertEqual(round(trip0.data.duration), 14 * 60 + 41) self.assertEqual(round(trip1.data.duration), 1 * 60 * 60 + 50 * 60 + 56) self.assertIsNotNone(place0.data.location)
def runIntakePipeline(uuid): # Move these imports here so that we don't inadvertently load the modules, # and any related config modules, before we want to import emission.analysis.intake.cleaning.filter_accuracy as eaicf import emission.storage.timeseries.format_hacks.move_filter_field as estfm import emission.analysis.intake.segmentation.trip_segmentation as eaist import emission.analysis.intake.segmentation.section_segmentation as eaiss import emission.analysis.intake.cleaning.location_smoothing as eaicl import emission.analysis.intake.cleaning.clean_and_resample as eaicr import emission.analysis.classification.inference.mode.pipeline as eacimp eaicf.filter_accuracy(uuid) eaist.segment_current_trips(uuid) eaiss.segment_current_sections(uuid) eaicl.filter_current_sections(uuid) eaicr.clean_and_resample(uuid) eacimp.predict_mode(uuid)
def testCreateFromData(self): etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") eaicf.filter_accuracy(self.testUUID) estfm.move_all_filters_to_data() eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID) trips = esdctp.get_all_common_trips_for_user(self.testUUID) trips_list = [] for p in trips: trips_list.append(esdctp.make_common_trip_from_json(p)) for trip in trips_list: self.assertIsNotNone(trip.start_loc) self.assertIsNotNone(trip.end_loc) self.assertTrue(len(trip["trips"]) > 0) rand_id = random.choice(trip["trips"]) self.assertEqual(type(esdtq.get_trip(rand_id)), ecwt.Trip) self.assertTrue(trip.probabilites.sum() > 0) self.assertEqual(str(trip.user_id), "test2")
def testIOSSegmentationWrapperWithAutoTrip(self): eaist.segment_current_trips(self.iosUUID) eaiss.segment_current_sections(self.iosUUID) tq_trip = estt.TimeQuery("data.start_ts", 1446700000, 1446900000) created_trips = esda.get_entries(esda.RAW_TRIP_KEY, self.iosUUID, tq_trip) self.assertEqual(len(created_trips), 2) logging.debug("created trips = %s" % created_trips) sections_stops = [(len(esdt.get_raw_sections_for_trip(self.iosUUID, trip.get_id())), len(esdt.get_raw_stops_for_trip(self.iosUUID, trip.get_id()))) for trip in created_trips] logging.debug(sections_stops) self.assertEqual(len(sections_stops), len(created_trips)) # The expected value was copy-pasted from the debug statement above self.assertEqual(sections_stops, [(0, 0), (11, 10)])
def testSegmentationWrapperWithAutoTrip(self): eaist.segment_current_trips(self.androidUUID) eaiss.segment_current_sections(self.androidUUID) tq_trip = estt.TimeQuery("data.start_ts", 1440658800, 1440745200) created_trips = esda.get_entries(esda.RAW_TRIP_KEY, self.androidUUID, tq_trip) self.assertEqual(len(created_trips), 8) sections_stops = [(len(esdt.get_raw_sections_for_trip(self.androidUUID, trip.get_id())), len(esdt.get_raw_stops_for_trip(self.androidUUID, trip.get_id()))) for trip in created_trips] logging.debug(sections_stops) self.assertEqual(len(sections_stops), len(created_trips)) # The expected value was copy-pasted from the debug statement above self.assertEqual(sections_stops, [(2, 1), (1, 0), (2, 1), (2, 1), (1, 0), (2, 1), (4, 3), (2, 1)])
def testSegmentationWrapperWithAutoTrip(self): eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID) tq_trip = enua.UserCache.TimeQuery("start_ts", 1440658800, 1440745200) created_trips = esdt.get_trips(self.testUUID, tq_trip) for i, trip in enumerate(created_trips): created_stops = esdt.get_stops_for_trip(self.testUUID, trip.get_id()) created_sections = esdt.get_sections_for_trip( self.testUUID, trip.get_id()) for j, stop in enumerate(created_stops): logging.info("Retrieved stops %s: %s -> %s" % (j, stop.enter_fmt_time, stop.exit_fmt_time)) for j, section in enumerate(created_sections): logging.info("Retrieved sections %s: %s -> %s" % (j, section.start_fmt_time, section.end_fmt_time))
def testCreateFromData(self): etc.setupRealExample( self, "emission/tests/data/real_examples/shankari_2015-aug-27") eaicf.filter_accuracy(self.testUUID) estfm.move_all_filters_to_data() eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID) trips = esdctp.get_all_common_trips_for_user(self.testUUID) trips_list = [] for p in trips: trips_list.append(esdctp.make_common_trip_from_json(p)) for trip in trips_list: self.assertIsNotNone(trip.start_loc) self.assertIsNotNone(trip.end_loc) self.assertTrue(len(trip["trips"]) > 0) rand_id = random.choice(trip["trips"]) self.assertEqual(type(esdtq.get_trip(rand_id)), ecwt.Trip) self.assertTrue(trip.probabilites.sum() > 0) self.assertEqual(str(trip.user_id), "test2")
def testSegmentationWrapperIOS(self): eaist.segment_current_trips(self.iosUUID) # The previous line should have created places and trips and stored # them into the database. Now, we want to query to ensure that they # were created correctly. tq_place = estt.TimeQuery("data.enter_ts", 1446796800, 1446847600) created_places_entries = esda.get_entries(esda.RAW_PLACE_KEY, self.iosUUID, tq_place) tq_trip = estt.TimeQuery("data.start_ts", 1446796800, 1446847600) created_trips_entries = esda.get_entries(esda.RAW_TRIP_KEY, self.iosUUID, tq_trip) for i, place in enumerate(created_places_entries): logging.debug("Retrieved places %s: %s -> %s" % (i, place.data.enter_fmt_time, place.data.exit_fmt_time)) for i, trip in enumerate(created_trips_entries): logging.debug("Retrieved trips %s: %s -> %s" % (i, trip.data.start_fmt_time, trip.data.end_fmt_time)) # We expect there to be 4 places, but the first one is that start of # the chain, so it has a start_time of None and it won't be retrieved # by the query on the start_time that we show here. self.assertEqual(len(created_places_entries), 3) self.assertEqual(len(created_trips_entries), 3) # Pick the first two trips and the first place and ensure that they are all linked correctly # Note that this is the first place, not the second place because the true first place will not # be retrieved by the query, as shown above # The first trip here is a dummy trip, so let's check the second and third trip instead trip0 = created_trips_entries[1] trip1 = created_trips_entries[2] place0 = created_places_entries[1] self.assertEqual(trip0.data.end_place, place0.get_id()) self.assertEqual(trip1.data.start_place, place0.get_id()) self.assertEqual(place0.data.ending_trip, trip0.get_id()) self.assertEqual(place0.data.starting_trip, trip1.get_id()) self.assertEqual(round(trip0.data.duration), 58 * 60 + 51) self.assertEqual(round(trip1.data.duration), 38 * 60 + 57) self.assertIsNotNone(place0.data.location)
def testPlaceTripTimeline(self): eaist.segment_current_trips(self.testUUID) tl = esdt.get_timeline(self.testUUID, self.day_start_ts, self.day_end_ts) prev_type = None prev_element = None checked_count = 0 for i, curr_element in enumerate(tl): # logging.debug("%s: %s" % (i, curr_element)) curr_type = self.get_type(curr_element) if prev_type is not None: checked_count = checked_count + 1 self.assertNotEqual(prev_type, curr_type) if prev_type == ecwp.Place: self.assertEqual(prev_element.starting_trip, curr_element.get_id()) else: self.assertEqual(prev_type, ecwt.Trip) self.assertEqual(prev_element.end_place, curr_element.get_id()) prev_type = curr_type prev_element = curr_element self.assertEqual(checked_count, i)
def testSegmentationWrapperWithAutoTrip(self): eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID) tq_trip = estt.TimeQuery("data.start_ts", 1440658800, 1440745200) created_trips = esda.get_entries(esda.RAW_TRIP_KEY, self.testUUID, tq_trip) for i, trip in enumerate(created_trips): logging.debug("current trip is %s" % trip) created_stops = esdt.get_raw_stops_for_trip(self.testUUID, trip.get_id()) created_sections = esdt.get_raw_sections_for_trip(self.testUUID, trip.get_id()) for j, stop in enumerate(created_stops): logging.info("Retrieved stops %s: %s -> %s" % (j, stop.data.enter_fmt_time, stop.data.exit_fmt_time)) for j, section in enumerate(created_sections): logging.info("Retrieved sections %s: %s -> %s" % (j, section.data.start_fmt_time, section.data.end_fmt_time))
def testSegmentationWrapperAndroid(self): eaist.segment_current_trips(self.androidUUID) # The previous line should have created places and trips and stored # them into the database. Now, we want to query to ensure that they # were created correctly. tq_place = enua.UserCache.TimeQuery("enter_ts", 1440658800, 1440745200) created_places = esdp.get_places(self.androidUUID, tq_place) tq_trip = enua.UserCache.TimeQuery("start_ts", 1440658800, 1440745200) created_trips = esdt.get_trips(self.androidUUID, tq_trip) for i, place in enumerate(created_places): logging.debug("Retrieved places %s: %s -> %s" % (i, place.enter_fmt_time, place.exit_fmt_time)) for i, trip in enumerate(created_trips): logging.debug("Retrieved trips %s: %s -> %s" % (i, trip.start_fmt_time, trip.end_fmt_time)) # We expect there to be 9 places, but the first one is that start of # the chain, so it has a start_time of None and it won't be retrieved # by the query on the start_time that we show here. self.assertEqual(len(created_places), 8) self.assertEqual(len(created_trips), 8) # Pick the first two trips and the first place and ensure that they are all linked correctly # Note that this is the first place, not the second place because the true first place will not # be retrieved by the query, as shown above trip0 = created_trips[0] trip1 = created_trips[1] place0 = created_places[0] self.assertEqual(trip0.end_place, place0.get_id()) self.assertEqual(trip1.start_place, place0.get_id()) self.assertEqual(place0.ending_trip, trip0.get_id()) self.assertEqual(place0.starting_trip, trip1.get_id()) self.assertEqual(round(trip0.duration), 11 * 60 + 9) self.assertEqual(round(trip1.duration), 6 * 60 + 54) self.assertIsNotNone(place0.location)
def testRemoveAllOutliers(self): etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2016-06-20") self.ts = esta.TimeSeries.get_time_series(self.testUUID) eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID) eaicl.filter_current_sections(self.testUUID) # get all sections sections = [ecwe.Entry(s) for s in self.ts.find_entries([esda.RAW_SECTION_KEY], time_query=None)] for section in sections: filtered_points_entry_doc = self.ts.get_entry_at_ts("analysis/smoothing", "data.section", section.get_id()) if filtered_points_entry_doc is not None: logging.debug("Found smoothing result for section %s" % section.get_id()) # Setting the set of deleted points to everything loc_tq = esda.get_time_query_for_trip_like(esda.RAW_SECTION_KEY, section.get_id()) loc_df = self.ts.get_data_df("background/filtered_location", loc_tq) filtered_points_entry_doc["data"]["deleted_points"] = loc_df["_id"].tolist() self.ts.update(ecwe.Entry(filtered_points_entry_doc)) # All we care is that this should not crash. eaicr.clean_and_resample(self.testUUID) # Most of the trips have zero length, but apparently one has non-zero length # because the stop length is non zero!! # So there is only one cleaned trip left cleaned_trips_df = self.ts.get_data_df(esda.CLEANED_TRIP_KEY, time_query=None) self.assertEqual(len(cleaned_trips_df), 1) # We don't support squishing sections, but we only store stops and sections # for non-squished trips. And this non-squished trip happens to have # two sections and one stop cleaned_sections_df = self.ts.get_data_df(esda.CLEANED_SECTION_KEY, time_query=None) self.assertEqual(len(cleaned_sections_df), 2) self.assertEqual(cleaned_sections_df.distance.tolist(), [0,0]) cleaned_stops_df = self.ts.get_data_df(esda.CLEANED_STOP_KEY, time_query=None) self.assertEqual(len(cleaned_stops_df), 1) self.assertAlmostEqual(cleaned_stops_df.distance[0], 3252, places=0)
def testSegmentationWrapperWithAutoTrip(self): eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID) tq_trip = estt.TimeQuery("data.start_ts", 1440658800, 1440745200) created_trips = esda.get_entries(esda.RAW_TRIP_KEY, self.testUUID, tq_trip) for i, trip in enumerate(created_trips): logging.debug("current trip is %s" % trip) created_stops = esdt.get_raw_stops_for_trip( self.testUUID, trip.get_id()) created_sections = esdt.get_raw_sections_for_trip( self.testUUID, trip.get_id()) for j, stop in enumerate(created_stops): logging.info( "Retrieved stops %s: %s -> %s" % (j, stop.data.enter_fmt_time, stop.data.exit_fmt_time)) for j, section in enumerate(created_sections): logging.info("Retrieved sections %s: %s -> %s" % (j, section.data.start_fmt_time, section.data.end_fmt_time))
def run_intake_pipeline_for_user(uuid): uh = euah.UserCacheHandler.getUserCacheHandler(uuid) with ect.Timer() as uct: logging.info("*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) uh.moveToLongTerm() esds.store_pipeline_time(uuid, ecwp.PipelineStages.USERCACHE.name, time.time(), uct.elapsed) # Hack until we delete these spurious entries # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868 # Hack no longer works after the stats are in the timeseries because # every user, even really old ones, have the pipeline run for them, # which inserts pipeline_time stats. # Let's strip out users who only have pipeline_time entries in the timeseries # I wonder if this (distinct versus count) is the reason that the pipeline has # become so much slower recently. Let's try to actually delete the # spurious entries or at least mark them as obsolete and see if that helps. if edb.get_timeseries_db().find({"user_id": uuid}).distinct("metadata.key") == ["stats/pipeline_time"]: logging.debug("Found no entries for %s, skipping" % uuid) return with ect.Timer() as aft: logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) eaicf.filter_accuracy(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.ACCURACY_FILTERING.name, time.time(), aft.elapsed) with ect.Timer() as tst: logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) eaist.segment_current_trips(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.TRIP_SEGMENTATION.name, time.time(), tst.elapsed) with ect.Timer() as sst: logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) eaiss.segment_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.SECTION_SEGMENTATION.name, time.time(), sst.elapsed) with ect.Timer() as jst: logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) eaicl.filter_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.JUMP_SMOOTHING.name, time.time(), jst.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) eaicr.clean_and_resample(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.CLEAN_RESAMPLING.name, time.time(), crt.elapsed) with ect.Timer() as act: logging.info("*" * 10 + "UUID %s: checking active mode trips to autocheck habits" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: checking active mode trips to autocheck habits" % uuid + "*" * 10) autocheck.give_points_for_all_tasks(uuid) esds.store_pipeline_time(uuid, "AUTOCHECK_POINTS", time.time(), act.elapsed) with ect.Timer() as ogt: logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) uh.storeViewsToCache() esds.store_pipeline_time(uuid, ecwp.PipelineStages.OUTPUT_GEN.name, time.time(), ogt.elapsed)
def runIntakePipeline(uuid): eaicf.filter_accuracy(uuid) eaist.segment_current_trips(uuid) eaiss.segment_current_sections(uuid) eaicl.filter_current_sections(uuid) eaicr.clean_and_resample(uuid)
] half = old_div(len(filtered_long_term_uuid_list), 2) long_term_uuid_list = filtered_long_term_uuid_list[half:] logging.info("*" * 10 + "long term UUID list = %s" % long_term_uuid_list) for uuid in long_term_uuid_list: if uuid is None: continue logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) eaicf.filter_accuracy(uuid) logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) eaist.segment_current_trips(uuid) logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) eaiss.segment_current_sections(uuid) logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) eaicl.filter_current_sections(uuid) logging.info("*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) eaicr.clean_and_resample(uuid) logging.info(
def testSegmentationWrapperCombined(self): # Change iOS entries to have the android UUID tsdb = edb.get_timeseries_db() for entry in esta.TimeSeries.get_time_series( self.iosUUID).find_entries(): entry["user_id"] = self.androidUUID tsdb.save(entry) # Now, segment the data for the combined UUID, which will include both # android and ios eaist.segment_current_trips(self.androidUUID) tq_place = estt.TimeQuery("data.enter_ts", 1440658800, 1446847600) created_places_entries = esda.get_entries(esda.RAW_PLACE_KEY, self.androidUUID, tq_place) tq_trip = estt.TimeQuery("data.start_ts", 1440658800, 1446847600) created_trips_entries = esda.get_entries(esda.RAW_TRIP_KEY, self.androidUUID, tq_trip) for i, place in enumerate(created_places_entries): logging.debug("Retrieved places %s: %s -> %s" % (i, place.data.enter_fmt_time, place.data.exit_fmt_time)) for i, trip in enumerate(created_trips_entries): logging.debug("Retrieved trips %s: %s -> %s" % (i, trip.data.start_fmt_time, trip.data.end_fmt_time)) # We expect there to be 12 places, but the first one is that start of # the chain, so it has a start_time of None and it won't be retrieved # by the query on the start_time that we show here. self.assertEqual(len(created_places_entries), 11) self.assertEqual(len(created_trips_entries), 11) # Pick the first two trips and the first place and ensure that they are all linked correctly # Note that this is the first place, not the second place because the true first place will not # be retrieved by the query, as shown above # The first trip here is a dummy trip, so let's check the second and third trip instead trip0time = created_trips_entries[0] trip1time = created_trips_entries[1] place0time = created_places_entries[0] self.assertEqual(trip0time.data.end_place, place0time.get_id()) self.assertEqual(trip1time.data.start_place, place0time.get_id()) self.assertEqual(place0time.data.ending_trip, trip0time.get_id()) self.assertEqual(place0time.data.starting_trip, trip1time.get_id()) self.assertEqual(round(trip0time.data.duration), 11 * 60 + 9) self.assertEqual(round(trip1time.data.duration), 6 * 60 + 54) self.assertIsNotNone(place0time.data.location) # There are 8 android trips first (index: 0-7). # index 8 is the short, bogus trip # So we want to check trips 9 and 10 trip0dist = created_trips_entries[9] trip1dist = created_trips_entries[10] place0dist = created_places_entries[9] self.assertEqual(trip0dist.data.end_place, place0dist.get_id()) self.assertEqual(trip1dist.data.start_place, place0dist.get_id()) self.assertEqual(place0dist.data.ending_trip, trip0dist.get_id()) self.assertEqual(place0dist.data.starting_trip, trip1dist.get_id()) self.assertEqual(round(trip0dist.data.duration), 58 * 60 + 51) self.assertEqual(round(trip1dist.data.duration), 39 * 60 + 49) self.assertIsNotNone(place0dist.data.location)
def run_intake_pipeline_for_user(uuid): uh = euah.UserCacheHandler.getUserCacheHandler(uuid) logging.info("*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) uh.moveToLongTerm() # Hack until we delete these spurious entries # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868 if edb.get_timeseries_db().find({"user_id": uuid}).count() == 0: logging.debug("Found no entries for %s, skipping" % uuid) return logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) eaicf.filter_accuracy(uuid) logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) eaist.segment_current_trips(uuid) logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) eaiss.segment_current_sections(uuid) logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) eaicl.filter_current_sections(uuid) logging.info("*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) eaicr.clean_and_resample(uuid) logging.info("*" * 10 + "UUID %s: checking active mode trips to autocheck habits" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: checking active mode trips to autocheck habits" % uuid + "*" * 10) autocheck.give_points_for_all_tasks(uuid) logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) uh.storeViewsToCache()
def testSegmentationWrapperCombined(self): # Change iOS entries to have the android UUID for entry in esta.TimeSeries.get_time_series(self.iosUUID).find_entries(): entry["user_id"] = self.androidUUID edb.get_timeseries_db().save(entry) # Now, segment the data for the combined UUID, which will include both # android and ios eaist.segment_current_trips(self.androidUUID) tq_place = enua.UserCache.TimeQuery("enter_ts", 1440658800, 1446847600) created_places = esdp.get_places(self.androidUUID, tq_place) tq_trip = enua.UserCache.TimeQuery("start_ts", 1440658800, 1446847600) created_trips = esdt.get_trips(self.androidUUID, tq_trip) for i, place in enumerate(created_places): logging.debug("Retrieved places %s: %s -> %s" % (i, place.enter_fmt_time, place.exit_fmt_time)) for i, trip in enumerate(created_trips): logging.debug("Retrieved trips %s: %s -> %s" % (i, trip.start_fmt_time, trip.end_fmt_time)) # We expect there to be 12 places, but the first one is that start of # the chain, so it has a start_time of None and it won't be retrieved # by the query on the start_time that we show here. self.assertEqual(len(created_places), 11) self.assertEqual(len(created_trips), 11) # Pick the first two trips and the first place and ensure that they are all linked correctly # Note that this is the first place, not the second place because the true first place will not # be retrieved by the query, as shown above # The first trip here is a dummy trip, so let's check the second and third trip instead trip0time = created_trips[0] trip1time = created_trips[1] place0time = created_places[0] self.assertEqual(trip0time.end_place, place0time.get_id()) self.assertEqual(trip1time.start_place, place0time.get_id()) self.assertEqual(place0time.ending_trip, trip0time.get_id()) self.assertEqual(place0time.starting_trip, trip1time.get_id()) self.assertEqual(round(trip0time.duration), 11 * 60 + 9) self.assertEqual(round(trip1time.duration), 6 * 60 + 54) self.assertIsNotNone(place0time.location) # There are 8 android trips first (index: 0-7). # index 8 is the short, bogus trip # So we want to check trips 9 and 10 trip0dist = created_trips[9] trip1dist = created_trips[10] place0dist = created_places[9] self.assertEqual(trip0dist.end_place, place0dist.get_id()) self.assertEqual(trip1dist.start_place, place0dist.get_id()) self.assertEqual(place0dist.ending_trip, trip0dist.get_id()) self.assertEqual(place0dist.starting_trip, trip1dist.get_id()) self.assertEqual(round(trip0dist.duration), 58 * 60 + 51) self.assertEqual(round(trip1dist.duration), 38 * 60 + 57) self.assertIsNotNone(place0dist.location)
def testEmptyCall(self): import uuid dummyUserId = uuid.uuid4() # We just expect that this won't raise an exception eaist.segment_current_trips(dummyUserId)
def run_pipeline(): edb.pm_address = request.json['pm_address'] print(edb.pm_address) # uuid is a filler and just needs to be consistent for each user. # These can be removed but require refactoring all code locations # that use the uuid. uuid = request.json['uuid'] uh = euah.UserCacheHandler.getUserCacheHandler(uuid) with ect.Timer() as uct: logging.info("*" * 10 + "moving to long term" + "*" * 10) print(str(arrow.now()) + "*" * 10 + "moving to long term" + "*" * 10) uh.moveToLongTerm() esds.store_pipeline_time(uuid, ecwp.PipelineStages.USERCACHE.name, time.time(), uct.elapsed) # Hack until we delete these spurious entries # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868 # Hack no longer works after the stats are in the timeseries because # every user, even really old ones, have the pipeline run for them, # which inserts pipeline_time stats. # Let's strip out users who only have pipeline_time entries in the timeseries # I wonder if this (distinct versus count) is the reason that the pipeline has # become so much slower recently. Let's try to actually delete the # spurious entries or at least mark them as obsolete and see if that helps. print(edb.get_timeseries_db().find({"user_id": uuid}).distinct("metadata.key")) if edb.get_timeseries_db().find({"user_id": uuid}).distinct("metadata.key") == ["stats/pipeline_time"]: logging.debug("Found no entries for %s, skipping" % uuid) return with ect.Timer() as aft: logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) eaicf.filter_accuracy(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.ACCURACY_FILTERING.name, time.time(), aft.elapsed) with ect.Timer() as tst: logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) eaist.segment_current_trips(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.TRIP_SEGMENTATION.name, time.time(), tst.elapsed) with ect.Timer() as sst: logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) eaiss.segment_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.SECTION_SEGMENTATION.name, time.time(), sst.elapsed) with ect.Timer() as jst: logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) eaicl.filter_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.JUMP_SMOOTHING.name, time.time(), jst.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) eaicr.clean_and_resample(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.CLEAN_RESAMPLING.name, time.time(), crt.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: inferring transportation mode" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: inferring transportation mode" % uuid + "*" * 10) eacimp.predict_mode(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.MODE_INFERENCE.name, time.time(), crt.elapsed) with ect.Timer() as ogt: logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) # use store data uh.storeViewsToCache() esds.store_pipeline_time(uuid, ecwp.PipelineStages.OUTPUT_GEN.name, time.time(), ogt.elapsed)
def run_intake_pipeline_for_user(uuid): uh = euah.UserCacheHandler.getUserCacheHandler(uuid) with ect.Timer() as uct: logging.info("*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) uh.moveToLongTerm() esds.store_pipeline_time(uuid, ecwp.PipelineStages.USERCACHE.name, time.time(), uct.elapsed) # Hack until we delete these spurious entries # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868 if edb.get_timeseries_db().find({"user_id": uuid}).count() == 0: logging.debug("Found no entries for %s, skipping" % uuid) return with ect.Timer() as aft: logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) eaicf.filter_accuracy(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.ACCURACY_FILTERING.name, time.time(), aft.elapsed) with ect.Timer() as tst: logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) eaist.segment_current_trips(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.TRIP_SEGMENTATION.name, time.time(), tst.elapsed) with ect.Timer() as sst: logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) eaiss.segment_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.SECTION_SEGMENTATION.name, time.time(), sst.elapsed) with ect.Timer() as jst: logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) eaicl.filter_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.JUMP_SMOOTHING.name, time.time(), jst.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) eaicr.clean_and_resample(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.CLEAN_RESAMPLING.name, time.time(), crt.elapsed) with ect.Timer() as act: logging.info( "*" * 10 + "UUID %s: checking active mode trips to autocheck habits" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: checking active mode trips to autocheck habits" % uuid + "*" * 10) autocheck.give_points_for_all_tasks(uuid) esds.store_pipeline_time(uuid, "AUTOCHECK_POINTS", time.time(), act.elapsed) with ect.Timer() as ogt: logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) uh.storeViewsToCache() esds.store_pipeline_time(uuid, ecwp.PipelineStages.OUTPUT_GEN.name, time.time(), ogt.elapsed)
def testPlaceTripTimeline(self): eaist.segment_current_trips(self.testUUID) tl = esdt.get_raw_timeline(self.testUUID, self.day_start_ts, self.day_end_ts) self.checkPlaceTripConsistency(tl)
def testDatetimeTimeline(self): eaist.segment_current_trips(self.testUUID) tl = esdt.get_raw_timeline_from_dt(self.testUUID, self.day_start_dt, self.day_end_dt) self.checkPlaceTripConsistency(tl)
def testSegmentationWrapperCombined(self): # Change iOS entries to have the android UUID tsdb = edb.get_timeseries_db() for entry in esta.TimeSeries.get_time_series( self.iosUUID).find_entries(): entry["user_id"] = self.androidUUID edb.save(tsdb, entry) # Now, segment the data for the combined UUID, which will include both # android and ios eaist.segment_current_trips(self.androidUUID) tq_place = estt.TimeQuery("data.enter_ts", 1440658800, 1446847600) created_places_entries = esda.get_entries(esda.RAW_PLACE_KEY, self.androidUUID, tq_place) tq_trip = estt.TimeQuery("data.start_ts", 1440658800, 1446847600) created_trips_entries = esda.get_entries(esda.RAW_TRIP_KEY, self.androidUUID, tq_trip, untracked_key=esda.RAW_UNTRACKED_KEY) for i, place in enumerate(created_places_entries): logging.debug("Retrieved places %s: %s -> %s" % (i, place.data.enter_fmt_time, place.data.exit_fmt_time)) for i, trip in enumerate(created_trips_entries): logging.debug("Retrieved trips %s: %s -> %s" % (i, trip.data.start_fmt_time, trip.data.end_fmt_time)) # We expect there to be 12 places, but the first one is that start of # the chain, so it has a start_time of None and it won't be retrieved # by the query on the start_time that we show here. self.assertEqual(len(created_places_entries), 11) self.assertEqual(len(created_trips_entries), 11) # Pick the first two trips and the first place and ensure that they are all linked correctly # Note that this is the first place, not the second place because the true first place will not # be retrieved by the query, as shown above # The first trip here is a dummy trip, so let's check the second and third trip instead trip0time = created_trips_entries[0] trip1time = created_trips_entries[1] place0time = created_places_entries[0] self.assertEqual(trip0time.data.end_place, place0time.get_id()) self.assertEqual(trip1time.data.start_place, place0time.get_id()) self.assertEqual(place0time.data.ending_trip, trip0time.get_id()) self.assertEqual(place0time.data.starting_trip, trip1time.get_id()) self.assertEqual(round(trip0time.data.duration), 11 * 60 + 9) self.assertEqual(round(trip1time.data.duration), 6 * 60 + 54) self.assertIsNotNone(place0time.data.location) # There are 9 android "trips" first (index: 0-8), including the untracked time # index 9 is the short, bogus trip # So we want to check trips 10 and 11 trip0dist = created_trips_entries[9] trip1dist = created_trips_entries[10] place0dist = created_places_entries[9] self.assertEqual(trip0dist.data.end_place, place0dist.get_id()) self.assertEqual(trip1dist.data.start_place, place0dist.get_id()) self.assertEqual(place0dist.data.ending_trip, trip0dist.get_id()) self.assertEqual(place0dist.data.starting_trip, trip1dist.get_id()) self.assertEqual(round(trip0dist.data.duration), 14 * 60 + 41) self.assertEqual(round(trip1dist.data.duration), 1 * 60 * 60 + 50 * 60 + 56) self.assertIsNotNone(place0dist.data.location)
def run_intake_pipeline_for_user(uuid): uh = euah.UserCacheHandler.getUserCacheHandler(uuid) with ect.Timer() as uct: logging.info("*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) uh.moveToLongTerm() esds.store_pipeline_time(uuid, ecwp.PipelineStages.USERCACHE.name, time.time(), uct.elapsed) with ect.Timer() as uit: logging.info("*" * 10 + "UUID %s: updating incoming user inputs" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: updating incoming user inputs" % uuid + "*" * 10) eaum.match_incoming_user_inputs(uuid) esds.store_pipeline_time( uuid, ecwp.PipelineStages.USER_INPUT_MATCH_INCOMING.name, time.time(), uct.elapsed) # Hack until we delete these spurious entries # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868 # Hack no longer works after the stats are in the timeseries because # every user, even really old ones, have the pipeline run for them, # which inserts pipeline_time stats. # Let's strip out users who only have pipeline_time entries in the timeseries # I wonder if this (distinct versus count) is the reason that the pipeline has # become so much slower recently. Let's try to actually delete the # spurious entries or at least mark them as obsolete and see if that helps. if edb.get_timeseries_db().find({ "user_id": uuid }).distinct("metadata.key") == ["stats/pipeline_time"]: logging.debug("Found no entries for %s, skipping" % uuid) return with ect.Timer() as aft: logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) eaicf.filter_accuracy(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.ACCURACY_FILTERING.name, time.time(), aft.elapsed) with ect.Timer() as tst: logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) eaist.segment_current_trips(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.TRIP_SEGMENTATION.name, time.time(), tst.elapsed) with ect.Timer() as sst: logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) eaiss.segment_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.SECTION_SEGMENTATION.name, time.time(), sst.elapsed) with ect.Timer() as jst: logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) eaicl.filter_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.JUMP_SMOOTHING.name, time.time(), jst.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) eaicr.clean_and_resample(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.CLEAN_RESAMPLING.name, time.time(), crt.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: inferring transportation mode" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: inferring transportation mode" % uuid + "*" * 10) eacimr.predict_mode(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.MODE_INFERENCE.name, time.time(), crt.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: creating confirmed objects " % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: creating confirmed objects " % uuid + "*" * 10) eaum.create_confirmed_objects(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.CREATE_CONFIRMED_OBJECTS.name, time.time(), crt.elapsed) with ect.Timer() as ogt: logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) uh.storeViewsToCache() esds.store_pipeline_time(uuid, ecwp.PipelineStages.OUTPUT_GEN.name, time.time(), ogt.elapsed)
for uuid in cache_uuid_list: logging.info("*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) uh = euah.UserCacheHandler.getUserCacheHandler(uuid) uh.moveToLongTerm() # TODO: For now, move filters from metadata to data. Once we get the # updated data collection clients to people, we don't need to do this any # more import emission.storage.timeseries.format_hacks.move_filter_field as estfm estfm.move_all_filters_to_data() long_term_uuid_list = esta.TimeSeries.get_uuid_list() logging.info("*" * 10 + "long term UUID list = %s" % long_term_uuid_list) for uuid in long_term_uuid_list: logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) eaicf.filter_accuracy(uuid) logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) eaist.segment_current_trips(uuid) logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) eaiss.segment_current_sections(uuid) logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) eaicl.filter_current_sections(uuid) logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) uh = euah.UserCacheHandler.getUserCacheHandler(uuid) uh.storeViewsToCache()
def runIntakePipeline(uuid): eaicf.filter_accuracy(uuid) estfm.move_all_filters_to_data() eaist.segment_current_trips(uuid) eaiss.segment_current_sections(uuid) eaicr.clean_and_resample(uuid)