def setUp(self): etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") self.androidUUID = self.testUUID etc.setupRealExample(self, "emission/tests/data/real_examples/iphone_2015-11-06") self.iosUUID = self.testUUID eaicf.filter_accuracy(self.iosUUID) logging.debug("androidUUID = %s, iosUUID = %s" % (self.androidUUID, self.iosUUID))
def testEmptyCall(self): # Check call to the entire filter accuracy with a zero length timeseries import emission.core.get_database as edb edb.get_timeseries_db().remove({"user_id": self.testUUID}) # We expect that this should not throw eaicf.filter_accuracy(self.testUUID) self.assertEqual(len(self.ts.get_data_df("background/location")), 0)
def testEmptyCall(self): # Check call to the entire filter accuracy with a zero length timeseries import emission.core.get_database as edb edb.get_timeseries_db().remove({"user_id": self.testUUID}) # We expect that this should not throw eaicf.filter_accuracy(self.testUUID) self.assertEqual(len(self.ts.get_data_df("background/location")), 0)
def setUp(self): etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") self.androidUUID = self.testUUID etc.setupRealExample(self, "emission/tests/data/real_examples/iphone_2015-11-06") self.iosUUID = self.testUUID eaicf.filter_accuracy(self.iosUUID) logging.debug("androidUUID = %s, iosUUID = %s" % (self.androidUUID, self.iosUUID))
def setUp(self): self.clearRelatedDb() etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") eaicf.filter_accuracy(self.testUUID) estfm.move_all_filters_to_data() logging.info("After loading, timeseries db size = %s" % edb.get_timeseries_db().count()) self.day_start_ts = 1440658800 self.day_end_ts = 1440745200
def testFilterAccuracy(self): unfiltered_points_df = self.ts.get_data_df("background/location", None) self.assertEqual(len(unfiltered_points_df), 205) pre_filtered_points_df = self.ts.get_data_df("background/filtered_location", None) self.assertEqual(len(pre_filtered_points_df), 0) eaicf.filter_accuracy(self.testUUID) filtered_points_df = self.ts.get_data_df("background/filtered_location", None) self.assertEqual(len(filtered_points_df), 124)
def setUp(self): etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") eaicf.filter_accuracy(self.testUUID) estfm.move_all_filters_to_data() logging.info("After loading, timeseries db size = %s" % edb.get_timeseries_db().count()) self.day_start_ts = 1440658800 self.day_end_ts = 1440745200 self.day_start_dt = ecwl.LocalDate({'year': 2015, 'month': 8, 'day': 27}) self.day_end_dt = ecwl.LocalDate({'year': 2015, 'month': 8, 'day': 27})
def setUp(self): etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") eaicf.filter_accuracy(self.testUUID) estfm.move_all_filters_to_data() logging.info("After loading, timeseries db size = %s" % edb.get_timeseries_db().count()) self.day_start_ts = 1440658800 self.day_end_ts = 1440745200 self.day_start_dt = ecwl.LocalDate({'year': 2015, 'month': 8, 'day': 27}) self.day_end_dt = ecwl.LocalDate({'year': 2015, 'month': 8, 'day': 27})
def setUp(self): etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") self.androidUUID = self.testUUID eaicf.filter_accuracy(self.androidUUID) self.testUUID = uuid.UUID("c76a0487-7e5a-3b17-a449-47be666b36f6") self.entries = json.load(open("emission/tests/data/real_examples/iphone_2015-11-06"), object_hook = bju.object_hook) etc.setupRealExampleWithEntries(self) self.iosUUID = self.testUUID eaicf.filter_accuracy(self.iosUUID)
def setUp(self): etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") self.androidUUID = self.testUUID self.testUUID = uuid.UUID("c76a0487-7e5a-3b17-a449-47be666b36f6") self.entries = json.load(open("emission/tests/data/real_examples/iphone_2015-11-06"), object_hook = bju.object_hook) etc.setupRealExampleWithEntries(self) self.iosUUID = self.testUUID eaicf.filter_accuracy(self.iosUUID) logging.debug("androidUUID = %s, iosUUID = %s" % (self.androidUUID, self.iosUUID))
def setUp(self): self.clearRelatedDb() etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") eaicf.filter_accuracy(self.testUUID) estfm.move_all_filters_to_data() logging.info("After loading, timeseries db size = %s" % edb.get_timeseries_db().count()) self.day_start_ts = 1440658800 self.day_end_ts = 1440745200 self.day_start_dt = pydt.datetime(2015,8,27) self.day_end_dt = pydt.datetime(2015,8,28)
def setUp(self): self.clearRelatedDb() edb.get_trip_db().remove() edb.get_section_db().remove() edb.get_trip_new_db().remove() edb.get_section_new_db().remove() etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-21") eaicf.filter_accuracy(self.testUUID) estfm.move_all_filters_to_data() eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID)
def testFilterAccuracy(self): unfiltered_points_df = self.ts.get_data_df("background/location", None) self.assertEqual(len(unfiltered_points_df), 205) pre_filtered_points_df = self.ts.get_data_df( "background/filtered_location", None) self.assertEqual(len(pre_filtered_points_df), 0) eaicf.filter_accuracy(self.testUUID) filtered_points_df = self.ts.get_data_df( "background/filtered_location", None) self.assertEqual(len(filtered_points_df), 124)
def setUp(self): etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") eaicf.filter_accuracy(self.testUUID) estfm.move_all_filters_to_data() logging.info( "After loading, timeseries db size = %s" % edb.get_timeseries_db().count()) self.day_start_ts = 1440658800 self.day_end_ts = 1440745200 self.day_start_dt = esdldq.get_local_date(self.day_start_ts, "America/Los_Angeles") self.day_end_dt = esdldq.get_local_date(self.day_end_ts, "America/Los_Angeles")
def setUp(self): self.analysis_conf_path = \ etc.set_analysis_config("intake.cleaning.filter_accuracy.enable", True) etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") self.androidUUID = self.testUUID eaicf.filter_accuracy(self.androidUUID) self.testUUID = uuid.UUID("c76a0487-7e5a-3b17-a449-47be666b36f6") self.entries = json.load(open("emission/tests/data/real_examples/iphone_2015-11-06"), object_hook = bju.object_hook) etc.setupRealExampleWithEntries(self) self.iosUUID = self.testUUID eaicf.filter_accuracy(self.iosUUID)
def setUp(self): self.analysis_conf_path = \ etc.set_analysis_config("intake.cleaning.filter_accuracy.enable", True) etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") self.androidUUID = self.testUUID self.testUUID = uuid.UUID("c76a0487-7e5a-3b17-a449-47be666b36f6") self.entries = json.load(open("emission/tests/data/real_examples/iphone_2015-11-06"), object_hook = bju.object_hook) etc.setupRealExampleWithEntries(self) self.iosUUID = self.testUUID eaicf.filter_accuracy(self.iosUUID) logging.debug("androidUUID = %s, iosUUID = %s" % (self.androidUUID, self.iosUUID))
def testCreatePlace(self): etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") eaicf.filter_accuracy(self.testUUID) estfm.move_all_filters_to_data() eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID) data = eamtcp.main(self.testUUID) esdcpq.create_places(data, self.testUUID) places = esdcpq.get_all_common_places_for_user(self.testUUID) places_list = [] for p in places: places_list.append(esdcpq.make_common_place(p)) for place in places_list: self.assertIsNotNone(place.location) self.assertIsNotNone(place["successors"])
def runIntakePipeline(uuid): # Move these imports here so that we don't inadvertently load the modules, # and any related config modules, before we want to import emission.analysis.intake.cleaning.filter_accuracy as eaicf import emission.storage.timeseries.format_hacks.move_filter_field as estfm import emission.analysis.intake.segmentation.trip_segmentation as eaist import emission.analysis.intake.segmentation.section_segmentation as eaiss import emission.analysis.intake.cleaning.location_smoothing as eaicl import emission.analysis.intake.cleaning.clean_and_resample as eaicr import emission.analysis.classification.inference.mode.pipeline as eacimp eaicf.filter_accuracy(uuid) eaist.segment_current_trips(uuid) eaiss.segment_current_sections(uuid) eaicl.filter_current_sections(uuid) eaicr.clean_and_resample(uuid) eacimp.predict_mode(uuid)
def runIntakePipeline(uuid): # Move these imports here so that we don't inadvertently load the modules, # and any related config modules, before we want to import emission.analysis.intake.cleaning.filter_accuracy as eaicf import emission.storage.timeseries.format_hacks.move_filter_field as estfm import emission.analysis.intake.segmentation.trip_segmentation as eaist import emission.analysis.intake.segmentation.section_segmentation as eaiss import emission.analysis.intake.cleaning.location_smoothing as eaicl import emission.analysis.intake.cleaning.clean_and_resample as eaicr import emission.analysis.classification.inference.mode.pipeline as eacimp eaicf.filter_accuracy(uuid) eaist.segment_current_trips(uuid) eaiss.segment_current_sections(uuid) eaicl.filter_current_sections(uuid) eaicr.clean_and_resample(uuid) eacimp.predict_mode(uuid)
def testCreateFromData(self): etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") eaicf.filter_accuracy(self.testUUID) estfm.move_all_filters_to_data() eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID) trips = esdctp.get_all_common_trips_for_user(self.testUUID) trips_list = [] for p in trips: trips_list.append(esdctp.make_common_trip_from_json(p)) for trip in trips_list: self.assertIsNotNone(trip.start_loc) self.assertIsNotNone(trip.end_loc) self.assertTrue(len(trip["trips"]) > 0) rand_id = random.choice(trip["trips"]) self.assertEqual(type(esdtq.get_trip(rand_id)), ecwt.Trip) self.assertTrue(trip.probabilites.sum() > 0) self.assertEqual(str(trip.user_id), "test2")
def testCreateFromData(self): etc.setupRealExample( self, "emission/tests/data/real_examples/shankari_2015-aug-27") eaicf.filter_accuracy(self.testUUID) estfm.move_all_filters_to_data() eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID) trips = esdctp.get_all_common_trips_for_user(self.testUUID) trips_list = [] for p in trips: trips_list.append(esdctp.make_common_trip_from_json(p)) for trip in trips_list: self.assertIsNotNone(trip.start_loc) self.assertIsNotNone(trip.end_loc) self.assertTrue(len(trip["trips"]) > 0) rand_id = random.choice(trip["trips"]) self.assertEqual(type(esdtq.get_trip(rand_id)), ecwt.Trip) self.assertTrue(trip.probabilites.sum() > 0) self.assertEqual(str(trip.user_id), "test2")
def setUp(self): etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") eaicf.filter_accuracy(self.testUUID)
def setUp(self): self.clearRelatedDb() etc.setupRealExample( self, "emission/tests/data/real_examples/shankari_2015-aug-27") eaicf.filter_accuracy(self.testUUID) estfm.move_all_filters_to_data()
def run_intake_pipeline_for_user(uuid): uh = euah.UserCacheHandler.getUserCacheHandler(uuid) logging.info("*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) uh.moveToLongTerm() # Hack until we delete these spurious entries # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868 if edb.get_timeseries_db().find({"user_id": uuid}).count() == 0: logging.debug("Found no entries for %s, skipping" % uuid) return logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) eaicf.filter_accuracy(uuid) logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) eaist.segment_current_trips(uuid) logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) eaiss.segment_current_sections(uuid) logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) eaicl.filter_current_sections(uuid) logging.info("*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) eaicr.clean_and_resample(uuid) logging.info("*" * 10 + "UUID %s: checking active mode trips to autocheck habits" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: checking active mode trips to autocheck habits" % uuid + "*" * 10) autocheck.give_points_for_all_tasks(uuid) logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) uh.storeViewsToCache()
def run_intake_pipeline_for_user(uuid): uh = euah.UserCacheHandler.getUserCacheHandler(uuid) with ect.Timer() as uct: logging.info("*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) uh.moveToLongTerm() esds.store_pipeline_time(uuid, ecwp.PipelineStages.USERCACHE.name, time.time(), uct.elapsed) # Hack until we delete these spurious entries # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868 # Hack no longer works after the stats are in the timeseries because # every user, even really old ones, have the pipeline run for them, # which inserts pipeline_time stats. # Let's strip out users who only have pipeline_time entries in the timeseries # I wonder if this (distinct versus count) is the reason that the pipeline has # become so much slower recently. Let's try to actually delete the # spurious entries or at least mark them as obsolete and see if that helps. if edb.get_timeseries_db().find({"user_id": uuid}).distinct("metadata.key") == ["stats/pipeline_time"]: logging.debug("Found no entries for %s, skipping" % uuid) return with ect.Timer() as aft: logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) eaicf.filter_accuracy(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.ACCURACY_FILTERING.name, time.time(), aft.elapsed) with ect.Timer() as tst: logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) eaist.segment_current_trips(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.TRIP_SEGMENTATION.name, time.time(), tst.elapsed) with ect.Timer() as sst: logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) eaiss.segment_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.SECTION_SEGMENTATION.name, time.time(), sst.elapsed) with ect.Timer() as jst: logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) eaicl.filter_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.JUMP_SMOOTHING.name, time.time(), jst.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) eaicr.clean_and_resample(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.CLEAN_RESAMPLING.name, time.time(), crt.elapsed) with ect.Timer() as act: logging.info("*" * 10 + "UUID %s: checking active mode trips to autocheck habits" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: checking active mode trips to autocheck habits" % uuid + "*" * 10) autocheck.give_points_for_all_tasks(uuid) esds.store_pipeline_time(uuid, "AUTOCHECK_POINTS", time.time(), act.elapsed) with ect.Timer() as ogt: logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) uh.storeViewsToCache() esds.store_pipeline_time(uuid, ecwp.PipelineStages.OUTPUT_GEN.name, time.time(), ogt.elapsed)
def setUp(self): self.copied_model_path = etc.copy_dummy_seed_for_inference() etc.setupRealExample( self, "emission/tests/data/real_examples/shankari_2015-aug-27") eaicf.filter_accuracy(self.testUUID)
# TEST_PHONE_IDS are not critical - we can run a pipeline for them once a day filtered_long_term_uuid_list = [ u for u in all_long_term_uuid_list if u not in estag.TEST_PHONE_IDS ] half = old_div(len(filtered_long_term_uuid_list), 2) long_term_uuid_list = filtered_long_term_uuid_list[half:] logging.info("*" * 10 + "long term UUID list = %s" % long_term_uuid_list) for uuid in long_term_uuid_list: if uuid is None: continue logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) eaicf.filter_accuracy(uuid) logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) eaist.segment_current_trips(uuid) logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) eaiss.segment_current_sections(uuid) logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) eaicl.filter_current_sections(uuid) logging.info("*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid +
def setUp(self): self.clearRelatedDb() etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") eaicf.filter_accuracy(self.testUUID) estfm.move_all_filters_to_data()
def runIntakePipeline(uuid): eaicf.filter_accuracy(uuid) eaist.segment_current_trips(uuid) eaiss.segment_current_sections(uuid) eaicl.filter_current_sections(uuid) eaicr.clean_and_resample(uuid)
for uuid in cache_uuid_list: logging.info("*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) uh = euah.UserCacheHandler.getUserCacheHandler(uuid) uh.moveToLongTerm() # TODO: For now, move filters from metadata to data. Once we get the # updated data collection clients to people, we don't need to do this any # more import emission.storage.timeseries.format_hacks.move_filter_field as estfm estfm.move_all_filters_to_data() long_term_uuid_list = esta.TimeSeries.get_uuid_list() logging.info("*" * 10 + "long term UUID list = %s" % long_term_uuid_list) for uuid in long_term_uuid_list: logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) eaicf.filter_accuracy(uuid) logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) eaist.segment_current_trips(uuid) logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) eaiss.segment_current_sections(uuid) logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) eaicl.filter_current_sections(uuid) logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) uh = euah.UserCacheHandler.getUserCacheHandler(uuid) uh.storeViewsToCache()
def runIntakePipeline(uuid): eaicf.filter_accuracy(uuid) eaist.segment_current_trips(uuid) eaiss.segment_current_sections(uuid) eaicl.filter_current_sections(uuid) eaicr.clean_and_resample(uuid)
def import_test_info(self): etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") eaicf.filter_accuracy(self.testUUID) etc.runIntakePipeline(self.testUUID)
def runIntakePipeline(uuid): eaicf.filter_accuracy(uuid) estfm.move_all_filters_to_data() eaist.segment_current_trips(uuid) eaiss.segment_current_sections(uuid) eaicr.clean_and_resample(uuid)
def run_intake_pipeline_for_user(uuid): uh = euah.UserCacheHandler.getUserCacheHandler(uuid) with ect.Timer() as uct: logging.info("*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) uh.moveToLongTerm() esds.store_pipeline_time(uuid, ecwp.PipelineStages.USERCACHE.name, time.time(), uct.elapsed) with ect.Timer() as uit: logging.info("*" * 10 + "UUID %s: updating incoming user inputs" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: updating incoming user inputs" % uuid + "*" * 10) eaum.match_incoming_user_inputs(uuid) esds.store_pipeline_time( uuid, ecwp.PipelineStages.USER_INPUT_MATCH_INCOMING.name, time.time(), uct.elapsed) # Hack until we delete these spurious entries # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868 # Hack no longer works after the stats are in the timeseries because # every user, even really old ones, have the pipeline run for them, # which inserts pipeline_time stats. # Let's strip out users who only have pipeline_time entries in the timeseries # I wonder if this (distinct versus count) is the reason that the pipeline has # become so much slower recently. Let's try to actually delete the # spurious entries or at least mark them as obsolete and see if that helps. if edb.get_timeseries_db().find({ "user_id": uuid }).distinct("metadata.key") == ["stats/pipeline_time"]: logging.debug("Found no entries for %s, skipping" % uuid) return with ect.Timer() as aft: logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) eaicf.filter_accuracy(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.ACCURACY_FILTERING.name, time.time(), aft.elapsed) with ect.Timer() as tst: logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) eaist.segment_current_trips(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.TRIP_SEGMENTATION.name, time.time(), tst.elapsed) with ect.Timer() as sst: logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) eaiss.segment_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.SECTION_SEGMENTATION.name, time.time(), sst.elapsed) with ect.Timer() as jst: logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) eaicl.filter_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.JUMP_SMOOTHING.name, time.time(), jst.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) eaicr.clean_and_resample(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.CLEAN_RESAMPLING.name, time.time(), crt.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: inferring transportation mode" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: inferring transportation mode" % uuid + "*" * 10) eacimr.predict_mode(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.MODE_INFERENCE.name, time.time(), crt.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: creating confirmed objects " % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: creating confirmed objects " % uuid + "*" * 10) eaum.create_confirmed_objects(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.CREATE_CONFIRMED_OBJECTS.name, time.time(), crt.elapsed) with ect.Timer() as ogt: logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) uh.storeViewsToCache() esds.store_pipeline_time(uuid, ecwp.PipelineStages.OUTPUT_GEN.name, time.time(), ogt.elapsed)
def import_test_info(self): etc.setupRealExample( self, "emission/tests/data/real_examples/shankari_2015-aug-27") eaicf.filter_accuracy(self.testUUID) etc.runIntakePipeline(self.testUUID)
def setUp(self): self.copied_model_path = etc.copy_dummy_seed_for_inference() etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27") eaicf.filter_accuracy(self.testUUID)
def run_pipeline(): edb.pm_address = request.json['pm_address'] print(edb.pm_address) # uuid is a filler and just needs to be consistent for each user. # These can be removed but require refactoring all code locations # that use the uuid. uuid = request.json['uuid'] uh = euah.UserCacheHandler.getUserCacheHandler(uuid) with ect.Timer() as uct: logging.info("*" * 10 + "moving to long term" + "*" * 10) print(str(arrow.now()) + "*" * 10 + "moving to long term" + "*" * 10) uh.moveToLongTerm() esds.store_pipeline_time(uuid, ecwp.PipelineStages.USERCACHE.name, time.time(), uct.elapsed) # Hack until we delete these spurious entries # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868 # Hack no longer works after the stats are in the timeseries because # every user, even really old ones, have the pipeline run for them, # which inserts pipeline_time stats. # Let's strip out users who only have pipeline_time entries in the timeseries # I wonder if this (distinct versus count) is the reason that the pipeline has # become so much slower recently. Let's try to actually delete the # spurious entries or at least mark them as obsolete and see if that helps. print(edb.get_timeseries_db().find({"user_id": uuid}).distinct("metadata.key")) if edb.get_timeseries_db().find({"user_id": uuid}).distinct("metadata.key") == ["stats/pipeline_time"]: logging.debug("Found no entries for %s, skipping" % uuid) return with ect.Timer() as aft: logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) eaicf.filter_accuracy(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.ACCURACY_FILTERING.name, time.time(), aft.elapsed) with ect.Timer() as tst: logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) eaist.segment_current_trips(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.TRIP_SEGMENTATION.name, time.time(), tst.elapsed) with ect.Timer() as sst: logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) eaiss.segment_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.SECTION_SEGMENTATION.name, time.time(), sst.elapsed) with ect.Timer() as jst: logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) eaicl.filter_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.JUMP_SMOOTHING.name, time.time(), jst.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) eaicr.clean_and_resample(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.CLEAN_RESAMPLING.name, time.time(), crt.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: inferring transportation mode" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: inferring transportation mode" % uuid + "*" * 10) eacimp.predict_mode(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.MODE_INFERENCE.name, time.time(), crt.elapsed) with ect.Timer() as ogt: logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) # use store data uh.storeViewsToCache() esds.store_pipeline_time(uuid, ecwp.PipelineStages.OUTPUT_GEN.name, time.time(), ogt.elapsed)
def setUp(self): etc.setupRealExample( self, "emission/tests/data/real_examples/shankari_2015-aug-27") eaicf.filter_accuracy(self.testUUID)
def run_intake_pipeline_for_user(uuid): uh = euah.UserCacheHandler.getUserCacheHandler(uuid) with ect.Timer() as uct: logging.info("*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) uh.moveToLongTerm() esds.store_pipeline_time(uuid, ecwp.PipelineStages.USERCACHE.name, time.time(), uct.elapsed) # Hack until we delete these spurious entries # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868 if edb.get_timeseries_db().find({"user_id": uuid}).count() == 0: logging.debug("Found no entries for %s, skipping" % uuid) return with ect.Timer() as aft: logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) eaicf.filter_accuracy(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.ACCURACY_FILTERING.name, time.time(), aft.elapsed) with ect.Timer() as tst: logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) eaist.segment_current_trips(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.TRIP_SEGMENTATION.name, time.time(), tst.elapsed) with ect.Timer() as sst: logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) eaiss.segment_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.SECTION_SEGMENTATION.name, time.time(), sst.elapsed) with ect.Timer() as jst: logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) eaicl.filter_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.JUMP_SMOOTHING.name, time.time(), jst.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) eaicr.clean_and_resample(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.CLEAN_RESAMPLING.name, time.time(), crt.elapsed) with ect.Timer() as act: logging.info( "*" * 10 + "UUID %s: checking active mode trips to autocheck habits" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: checking active mode trips to autocheck habits" % uuid + "*" * 10) autocheck.give_points_for_all_tasks(uuid) esds.store_pipeline_time(uuid, "AUTOCHECK_POINTS", time.time(), act.elapsed) with ect.Timer() as ogt: logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) uh.storeViewsToCache() esds.store_pipeline_time(uuid, ecwp.PipelineStages.OUTPUT_GEN.name, time.time(), ogt.elapsed)