def testTripGeojson(self): eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID) eaicl.filter_current_sections(self.testUUID) tl = esdtl.get_raw_timeline(self.testUUID, 1440658800, 1440745200) self.assertEquals(len(tl.trips), 8) eaicr.clean_and_resample(self.testUUID) tl = esdtl.get_cleaned_timeline(self.testUUID, 1440658800, 1440745200) tl.fill_start_end_places() created_trips = tl.trips self.assertEquals(len(created_trips), 7) trip_geojson = gjfc.trip_to_geojson(created_trips[0], tl) logging.debug("first trip_geojson = %s" % bju.dumps(trip_geojson, indent=4)) self.assertEquals(trip_geojson.type, "FeatureCollection") self.assertEquals(trip_geojson.properties["feature_type"], "trip") self.assertEquals(len(trip_geojson.features), 5) day_geojson = gjfc.get_geojson_for_timeline(self.testUUID, tl) self.assertEquals(len(day_geojson), 7) self.assertEquals(day_geojson[-1].type, "FeatureCollection") self.assertEquals(day_geojson[-1].properties["feature_type"], "trip") self.assertEquals(len(day_geojson[-1].features), 5)
def testTripGeojson(self): eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID) eaicl.filter_current_sections(self.testUUID) tl = esdtl.get_raw_timeline(self.testUUID, 1440658800, 1440745200) self.assertEquals(len(tl.trips), 9) eaicr.clean_and_resample(self.testUUID) eacimp.predict_mode(self.testUUID) tl = esdtl.get_cleaned_timeline(self.testUUID, 1440658800, 1440745200) tl.fill_start_end_places() created_trips = tl.trips self.assertEquals(len(created_trips), 9) trip_geojson = gjfc.trip_to_geojson(created_trips[0], tl) logging.debug("first trip_geojson = %s" % bju.dumps(trip_geojson, indent=4)) self.assertEquals(trip_geojson.type, "FeatureCollection") self.assertEquals(trip_geojson.properties["feature_type"], "trip") self.assertEquals(len(trip_geojson.features), 5) day_geojson = gjfc.get_geojson_for_timeline(self.testUUID, tl) self.assertEquals(len(day_geojson), 8) self.assertEquals(day_geojson[-1].type, "FeatureCollection") self.assertEquals(day_geojson[-1].properties["feature_type"], "trip") self.assertEquals(len(day_geojson[-1].features), 5)
def testRemoveAllOutliers(self): etc.setupRealExample( self, "emission/tests/data/real_examples/shankari_2016-06-20") self.ts = esta.TimeSeries.get_time_series(self.testUUID) eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID) eaicl.filter_current_sections(self.testUUID) # get all sections sections = [ ecwe.Entry(s) for s in self.ts.find_entries([esda.RAW_SECTION_KEY], time_query=None) ] for section in sections: filtered_points_entry_doc = self.ts.get_entry_at_ts( "analysis/smoothing", "data.section", section.get_id()) if filtered_points_entry_doc is not None: logging.debug("Found smoothing result for section %s" % section.get_id()) # Setting the set of deleted points to everything loc_tq = esda.get_time_query_for_trip_like( esda.RAW_SECTION_KEY, section.get_id()) loc_df = self.ts.get_data_df("background/filtered_location", loc_tq) filtered_points_entry_doc["data"]["deleted_points"] = loc_df[ "_id"].tolist() self.ts.update(ecwe.Entry(filtered_points_entry_doc)) # All we care is that this should not crash. eaicr.clean_and_resample(self.testUUID) # Most of the trips have zero length, but apparently one has non-zero length # because the stop length is non zero!! # So there is only one cleaned trip left cleaned_trips_df = self.ts.get_data_df(esda.CLEANED_TRIP_KEY, time_query=None) self.assertEqual(len(cleaned_trips_df), 1) # We don't support squishing sections, but we only store stops and sections # for non-squished trips. And this non-squished trip happens to have # two sections and one stop cleaned_sections_df = self.ts.get_data_df(esda.CLEANED_SECTION_KEY, time_query=None) self.assertEqual(len(cleaned_sections_df), 2) self.assertEqual(cleaned_sections_df.distance.tolist(), [0, 0]) cleaned_stops_df = self.ts.get_data_df(esda.CLEANED_STOP_KEY, time_query=None) self.assertEqual(len(cleaned_stops_df), 1) self.assertAlmostEqual(cleaned_stops_df.distance[0], 3252, places=0)
def runIntakePipeline(uuid): # Move these imports here so that we don't inadvertently load the modules, # and any related config modules, before we want to import emission.analysis.intake.cleaning.filter_accuracy as eaicf import emission.storage.timeseries.format_hacks.move_filter_field as estfm import emission.analysis.intake.segmentation.trip_segmentation as eaist import emission.analysis.intake.segmentation.section_segmentation as eaiss import emission.analysis.intake.cleaning.location_smoothing as eaicl import emission.analysis.intake.cleaning.clean_and_resample as eaicr import emission.analysis.classification.inference.mode.pipeline as eacimp eaicf.filter_accuracy(uuid) eaist.segment_current_trips(uuid) eaiss.segment_current_sections(uuid) eaicl.filter_current_sections(uuid) eaicr.clean_and_resample(uuid) eacimp.predict_mode(uuid)
def runIntakePipeline(uuid): # Move these imports here so that we don't inadvertently load the modules, # and any related config modules, before we want to import emission.analysis.intake.cleaning.filter_accuracy as eaicf import emission.storage.timeseries.format_hacks.move_filter_field as estfm import emission.analysis.intake.segmentation.trip_segmentation as eaist import emission.analysis.intake.segmentation.section_segmentation as eaiss import emission.analysis.intake.cleaning.location_smoothing as eaicl import emission.analysis.intake.cleaning.clean_and_resample as eaicr import emission.analysis.classification.inference.mode.pipeline as eacimp eaicf.filter_accuracy(uuid) eaist.segment_current_trips(uuid) eaiss.segment_current_sections(uuid) eaicl.filter_current_sections(uuid) eaicr.clean_and_resample(uuid) eacimp.predict_mode(uuid)
def testRemoveAllOutliers(self): etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2016-06-20") self.ts = esta.TimeSeries.get_time_series(self.testUUID) eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID) eaicl.filter_current_sections(self.testUUID) # get all sections sections = [ecwe.Entry(s) for s in self.ts.find_entries([esda.RAW_SECTION_KEY], time_query=None)] for section in sections: filtered_points_entry_doc = self.ts.get_entry_at_ts("analysis/smoothing", "data.section", section.get_id()) if filtered_points_entry_doc is not None: logging.debug("Found smoothing result for section %s" % section.get_id()) # Setting the set of deleted points to everything loc_tq = esda.get_time_query_for_trip_like(esda.RAW_SECTION_KEY, section.get_id()) loc_df = self.ts.get_data_df("background/filtered_location", loc_tq) filtered_points_entry_doc["data"]["deleted_points"] = loc_df["_id"].tolist() self.ts.update(ecwe.Entry(filtered_points_entry_doc)) # All we care is that this should not crash. eaicr.clean_and_resample(self.testUUID) # Most of the trips have zero length, but apparently one has non-zero length # because the stop length is non zero!! # So there is only one cleaned trip left cleaned_trips_df = self.ts.get_data_df(esda.CLEANED_TRIP_KEY, time_query=None) self.assertEqual(len(cleaned_trips_df), 1) # We don't support squishing sections, but we only store stops and sections # for non-squished trips. And this non-squished trip happens to have # two sections and one stop cleaned_sections_df = self.ts.get_data_df(esda.CLEANED_SECTION_KEY, time_query=None) self.assertEqual(len(cleaned_sections_df), 2) self.assertEqual(cleaned_sections_df.distance.tolist(), [0,0]) cleaned_stops_df = self.ts.get_data_df(esda.CLEANED_STOP_KEY, time_query=None) self.assertEqual(len(cleaned_stops_df), 1) self.assertAlmostEqual(cleaned_stops_df.distance[0], 3252, places=0)
def run_intake_pipeline_for_user(uuid): uh = euah.UserCacheHandler.getUserCacheHandler(uuid) with ect.Timer() as uct: logging.info("*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) uh.moveToLongTerm() esds.store_pipeline_time(uuid, ecwp.PipelineStages.USERCACHE.name, time.time(), uct.elapsed) with ect.Timer() as uit: logging.info("*" * 10 + "UUID %s: updating incoming user inputs" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: updating incoming user inputs" % uuid + "*" * 10) eaum.match_incoming_user_inputs(uuid) esds.store_pipeline_time( uuid, ecwp.PipelineStages.USER_INPUT_MATCH_INCOMING.name, time.time(), uct.elapsed) # Hack until we delete these spurious entries # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868 # Hack no longer works after the stats are in the timeseries because # every user, even really old ones, have the pipeline run for them, # which inserts pipeline_time stats. # Let's strip out users who only have pipeline_time entries in the timeseries # I wonder if this (distinct versus count) is the reason that the pipeline has # become so much slower recently. Let's try to actually delete the # spurious entries or at least mark them as obsolete and see if that helps. if edb.get_timeseries_db().find({ "user_id": uuid }).distinct("metadata.key") == ["stats/pipeline_time"]: logging.debug("Found no entries for %s, skipping" % uuid) return with ect.Timer() as aft: logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) eaicf.filter_accuracy(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.ACCURACY_FILTERING.name, time.time(), aft.elapsed) with ect.Timer() as tst: logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) eaist.segment_current_trips(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.TRIP_SEGMENTATION.name, time.time(), tst.elapsed) with ect.Timer() as sst: logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) eaiss.segment_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.SECTION_SEGMENTATION.name, time.time(), sst.elapsed) with ect.Timer() as jst: logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) eaicl.filter_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.JUMP_SMOOTHING.name, time.time(), jst.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) eaicr.clean_and_resample(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.CLEAN_RESAMPLING.name, time.time(), crt.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: inferring transportation mode" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: inferring transportation mode" % uuid + "*" * 10) eacimr.predict_mode(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.MODE_INFERENCE.name, time.time(), crt.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: creating confirmed objects " % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: creating confirmed objects " % uuid + "*" * 10) eaum.create_confirmed_objects(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.CREATE_CONFIRMED_OBJECTS.name, time.time(), crt.elapsed) with ect.Timer() as ogt: logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) uh.storeViewsToCache() esds.store_pipeline_time(uuid, ecwp.PipelineStages.OUTPUT_GEN.name, time.time(), ogt.elapsed)
def runIntakePipeline(uuid): eaicf.filter_accuracy(uuid) eaist.segment_current_trips(uuid) eaiss.segment_current_sections(uuid) eaicl.filter_current_sections(uuid) eaicr.clean_and_resample(uuid)
for uuid in cache_uuid_list: logging.info("*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) uh = euah.UserCacheHandler.getUserCacheHandler(uuid) uh.moveToLongTerm() # TODO: For now, move filters from metadata to data. Once we get the # updated data collection clients to people, we don't need to do this any # more import emission.storage.timeseries.format_hacks.move_filter_field as estfm estfm.move_all_filters_to_data() long_term_uuid_list = esta.TimeSeries.get_uuid_list() logging.info("*" * 10 + "long term UUID list = %s" % long_term_uuid_list) for uuid in long_term_uuid_list: logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) eaicf.filter_accuracy(uuid) logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) eaist.segment_current_trips(uuid) logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) eaiss.segment_current_sections(uuid) logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) eaicl.filter_current_sections(uuid) logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) uh = euah.UserCacheHandler.getUserCacheHandler(uuid) uh.storeViewsToCache()
def run_intake_pipeline_for_user(uuid): uh = euah.UserCacheHandler.getUserCacheHandler(uuid) with ect.Timer() as uct: logging.info("*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) uh.moveToLongTerm() esds.store_pipeline_time(uuid, ecwp.PipelineStages.USERCACHE.name, time.time(), uct.elapsed) # Hack until we delete these spurious entries # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868 if edb.get_timeseries_db().find({"user_id": uuid}).count() == 0: logging.debug("Found no entries for %s, skipping" % uuid) return with ect.Timer() as aft: logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) eaicf.filter_accuracy(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.ACCURACY_FILTERING.name, time.time(), aft.elapsed) with ect.Timer() as tst: logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) eaist.segment_current_trips(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.TRIP_SEGMENTATION.name, time.time(), tst.elapsed) with ect.Timer() as sst: logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) eaiss.segment_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.SECTION_SEGMENTATION.name, time.time(), sst.elapsed) with ect.Timer() as jst: logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) eaicl.filter_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.JUMP_SMOOTHING.name, time.time(), jst.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) eaicr.clean_and_resample(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.CLEAN_RESAMPLING.name, time.time(), crt.elapsed) with ect.Timer() as act: logging.info( "*" * 10 + "UUID %s: checking active mode trips to autocheck habits" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: checking active mode trips to autocheck habits" % uuid + "*" * 10) autocheck.give_points_for_all_tasks(uuid) esds.store_pipeline_time(uuid, "AUTOCHECK_POINTS", time.time(), act.elapsed) with ect.Timer() as ogt: logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) uh.storeViewsToCache() esds.store_pipeline_time(uuid, ecwp.PipelineStages.OUTPUT_GEN.name, time.time(), ogt.elapsed)
def run_pipeline(): edb.pm_address = request.json['pm_address'] print(edb.pm_address) # uuid is a filler and just needs to be consistent for each user. # These can be removed but require refactoring all code locations # that use the uuid. uuid = request.json['uuid'] uh = euah.UserCacheHandler.getUserCacheHandler(uuid) with ect.Timer() as uct: logging.info("*" * 10 + "moving to long term" + "*" * 10) print(str(arrow.now()) + "*" * 10 + "moving to long term" + "*" * 10) uh.moveToLongTerm() esds.store_pipeline_time(uuid, ecwp.PipelineStages.USERCACHE.name, time.time(), uct.elapsed) # Hack until we delete these spurious entries # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868 # Hack no longer works after the stats are in the timeseries because # every user, even really old ones, have the pipeline run for them, # which inserts pipeline_time stats. # Let's strip out users who only have pipeline_time entries in the timeseries # I wonder if this (distinct versus count) is the reason that the pipeline has # become so much slower recently. Let's try to actually delete the # spurious entries or at least mark them as obsolete and see if that helps. print(edb.get_timeseries_db().find({"user_id": uuid}).distinct("metadata.key")) if edb.get_timeseries_db().find({"user_id": uuid}).distinct("metadata.key") == ["stats/pipeline_time"]: logging.debug("Found no entries for %s, skipping" % uuid) return with ect.Timer() as aft: logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) eaicf.filter_accuracy(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.ACCURACY_FILTERING.name, time.time(), aft.elapsed) with ect.Timer() as tst: logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) eaist.segment_current_trips(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.TRIP_SEGMENTATION.name, time.time(), tst.elapsed) with ect.Timer() as sst: logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) eaiss.segment_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.SECTION_SEGMENTATION.name, time.time(), sst.elapsed) with ect.Timer() as jst: logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) eaicl.filter_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.JUMP_SMOOTHING.name, time.time(), jst.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) eaicr.clean_and_resample(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.CLEAN_RESAMPLING.name, time.time(), crt.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: inferring transportation mode" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: inferring transportation mode" % uuid + "*" * 10) eacimp.predict_mode(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.MODE_INFERENCE.name, time.time(), crt.elapsed) with ect.Timer() as ogt: logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) # use store data uh.storeViewsToCache() esds.store_pipeline_time(uuid, ecwp.PipelineStages.OUTPUT_GEN.name, time.time(), ogt.elapsed)
logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) eaicf.filter_accuracy(uuid) logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) eaist.segment_current_trips(uuid) logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) eaiss.segment_current_sections(uuid) logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) eaicl.filter_current_sections(uuid) logging.info("*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) eaicr.clean_and_resample(uuid) logging.info( "*" * 10 + "UUID %s: checking active mode trips to autocheck habits" % uuid + "*" * 10) autocheck.give_points_for_all_tasks(uuid) logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) uh = euah.UserCacheHandler.getUserCacheHandler(uuid)
def runIntakePipeline(uuid): eaicf.filter_accuracy(uuid) eaist.segment_current_trips(uuid) eaiss.segment_current_sections(uuid) eaicl.filter_current_sections(uuid) eaicr.clean_and_resample(uuid)
def run_intake_pipeline_for_user(uuid): uh = euah.UserCacheHandler.getUserCacheHandler(uuid) with ect.Timer() as uct: logging.info("*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) uh.moveToLongTerm() esds.store_pipeline_time(uuid, ecwp.PipelineStages.USERCACHE.name, time.time(), uct.elapsed) # Hack until we delete these spurious entries # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868 # Hack no longer works after the stats are in the timeseries because # every user, even really old ones, have the pipeline run for them, # which inserts pipeline_time stats. # Let's strip out users who only have pipeline_time entries in the timeseries # I wonder if this (distinct versus count) is the reason that the pipeline has # become so much slower recently. Let's try to actually delete the # spurious entries or at least mark them as obsolete and see if that helps. if edb.get_timeseries_db().find({"user_id": uuid}).distinct("metadata.key") == ["stats/pipeline_time"]: logging.debug("Found no entries for %s, skipping" % uuid) return with ect.Timer() as aft: logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) eaicf.filter_accuracy(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.ACCURACY_FILTERING.name, time.time(), aft.elapsed) with ect.Timer() as tst: logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) eaist.segment_current_trips(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.TRIP_SEGMENTATION.name, time.time(), tst.elapsed) with ect.Timer() as sst: logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) eaiss.segment_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.SECTION_SEGMENTATION.name, time.time(), sst.elapsed) with ect.Timer() as jst: logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) eaicl.filter_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.JUMP_SMOOTHING.name, time.time(), jst.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) eaicr.clean_and_resample(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.CLEAN_RESAMPLING.name, time.time(), crt.elapsed) with ect.Timer() as act: logging.info("*" * 10 + "UUID %s: checking active mode trips to autocheck habits" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: checking active mode trips to autocheck habits" % uuid + "*" * 10) autocheck.give_points_for_all_tasks(uuid) esds.store_pipeline_time(uuid, "AUTOCHECK_POINTS", time.time(), act.elapsed) with ect.Timer() as ogt: logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) uh.storeViewsToCache() esds.store_pipeline_time(uuid, ecwp.PipelineStages.OUTPUT_GEN.name, time.time(), ogt.elapsed)
def run_intake_pipeline_for_user(uuid): uh = euah.UserCacheHandler.getUserCacheHandler(uuid) logging.info("*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) uh.moveToLongTerm() # Hack until we delete these spurious entries # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868 if edb.get_timeseries_db().find({"user_id": uuid}).count() == 0: logging.debug("Found no entries for %s, skipping" % uuid) return logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) eaicf.filter_accuracy(uuid) logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) eaist.segment_current_trips(uuid) logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) eaiss.segment_current_sections(uuid) logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) eaicl.filter_current_sections(uuid) logging.info("*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) eaicr.clean_and_resample(uuid) logging.info("*" * 10 + "UUID %s: checking active mode trips to autocheck habits" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: checking active mode trips to autocheck habits" % uuid + "*" * 10) autocheck.give_points_for_all_tasks(uuid) logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) uh.storeViewsToCache()