def run_intake_pipeline_for_user(uuid): uh = euah.UserCacheHandler.getUserCacheHandler(uuid) with ect.Timer() as uct: logging.info("*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) uh.moveToLongTerm() esds.store_pipeline_time(uuid, ecwp.PipelineStages.USERCACHE.name, time.time(), uct.elapsed) with ect.Timer() as uit: logging.info("*" * 10 + "UUID %s: updating incoming user inputs" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: updating incoming user inputs" % uuid + "*" * 10) eaum.match_incoming_user_inputs(uuid) esds.store_pipeline_time( uuid, ecwp.PipelineStages.USER_INPUT_MATCH_INCOMING.name, time.time(), uct.elapsed) # Hack until we delete these spurious entries # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868 # Hack no longer works after the stats are in the timeseries because # every user, even really old ones, have the pipeline run for them, # which inserts pipeline_time stats. # Let's strip out users who only have pipeline_time entries in the timeseries # I wonder if this (distinct versus count) is the reason that the pipeline has # become so much slower recently. Let's try to actually delete the # spurious entries or at least mark them as obsolete and see if that helps. if edb.get_timeseries_db().find({ "user_id": uuid }).distinct("metadata.key") == ["stats/pipeline_time"]: logging.debug("Found no entries for %s, skipping" % uuid) return with ect.Timer() as aft: logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) eaicf.filter_accuracy(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.ACCURACY_FILTERING.name, time.time(), aft.elapsed) with ect.Timer() as tst: logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) eaist.segment_current_trips(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.TRIP_SEGMENTATION.name, time.time(), tst.elapsed) with ect.Timer() as sst: logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) eaiss.segment_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.SECTION_SEGMENTATION.name, time.time(), sst.elapsed) with ect.Timer() as jst: logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) eaicl.filter_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.JUMP_SMOOTHING.name, time.time(), jst.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) eaicr.clean_and_resample(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.CLEAN_RESAMPLING.name, time.time(), crt.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: inferring transportation mode" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: inferring transportation mode" % uuid + "*" * 10) eacimr.predict_mode(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.MODE_INFERENCE.name, time.time(), crt.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: creating confirmed objects " % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: creating confirmed objects " % uuid + "*" * 10) eaum.create_confirmed_objects(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.CREATE_CONFIRMED_OBJECTS.name, time.time(), crt.elapsed) with ect.Timer() as ogt: logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) uh.storeViewsToCache() esds.store_pipeline_time(uuid, ecwp.PipelineStages.OUTPUT_GEN.name, time.time(), ogt.elapsed)
def run_intake_pipeline_for_user(uuid): uh = euah.UserCacheHandler.getUserCacheHandler(uuid) with ect.Timer() as uct: logging.info("*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10) uh.moveToLongTerm() esds.store_pipeline_time(uuid, ecwp.PipelineStages.USERCACHE.name, time.time(), uct.elapsed) # Hack until we delete these spurious entries # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868 if edb.get_timeseries_db().find({"user_id": uuid}).count() == 0: logging.debug("Found no entries for %s, skipping" % uuid) return with ect.Timer() as aft: logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) eaicf.filter_accuracy(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.ACCURACY_FILTERING.name, time.time(), aft.elapsed) with ect.Timer() as tst: logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) eaist.segment_current_trips(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.TRIP_SEGMENTATION.name, time.time(), tst.elapsed) with ect.Timer() as sst: logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) eaiss.segment_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.SECTION_SEGMENTATION.name, time.time(), sst.elapsed) with ect.Timer() as jst: logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) eaicl.filter_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.JUMP_SMOOTHING.name, time.time(), jst.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) eaicr.clean_and_resample(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.CLEAN_RESAMPLING.name, time.time(), crt.elapsed) with ect.Timer() as act: logging.info( "*" * 10 + "UUID %s: checking active mode trips to autocheck habits" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: checking active mode trips to autocheck habits" % uuid + "*" * 10) autocheck.give_points_for_all_tasks(uuid) esds.store_pipeline_time(uuid, "AUTOCHECK_POINTS", time.time(), act.elapsed) with ect.Timer() as ogt: logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) print( str(arrow.now()) + "*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) uh.storeViewsToCache() esds.store_pipeline_time(uuid, ecwp.PipelineStages.OUTPUT_GEN.name, time.time(), ogt.elapsed)
def before_request(): print("START %s %s %s" % (datetime.now(), request.method, request.path)) request.params.start_ts = time.time() request.params.timer = ect.Timer() request.params.timer.__enter__() logging.debug("START %s %s" % (request.method, request.path))
def run_pipeline(): edb.pm_address = request.json['pm_address'] print(edb.pm_address) # uuid is a filler and just needs to be consistent for each user. # These can be removed but require refactoring all code locations # that use the uuid. uuid = request.json['uuid'] uh = euah.UserCacheHandler.getUserCacheHandler(uuid) with ect.Timer() as uct: logging.info("*" * 10 + "moving to long term" + "*" * 10) print(str(arrow.now()) + "*" * 10 + "moving to long term" + "*" * 10) uh.moveToLongTerm() esds.store_pipeline_time(uuid, ecwp.PipelineStages.USERCACHE.name, time.time(), uct.elapsed) # Hack until we delete these spurious entries # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868 # Hack no longer works after the stats are in the timeseries because # every user, even really old ones, have the pipeline run for them, # which inserts pipeline_time stats. # Let's strip out users who only have pipeline_time entries in the timeseries # I wonder if this (distinct versus count) is the reason that the pipeline has # become so much slower recently. Let's try to actually delete the # spurious entries or at least mark them as obsolete and see if that helps. print(edb.get_timeseries_db().find({"user_id": uuid}).distinct("metadata.key")) if edb.get_timeseries_db().find({"user_id": uuid}).distinct("metadata.key") == ["stats/pipeline_time"]: logging.debug("Found no entries for %s, skipping" % uuid) return with ect.Timer() as aft: logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10) eaicf.filter_accuracy(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.ACCURACY_FILTERING.name, time.time(), aft.elapsed) with ect.Timer() as tst: logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10) eaist.segment_current_trips(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.TRIP_SEGMENTATION.name, time.time(), tst.elapsed) with ect.Timer() as sst: logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10) eaiss.segment_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.SECTION_SEGMENTATION.name, time.time(), sst.elapsed) with ect.Timer() as jst: logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10) eaicl.filter_current_sections(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.JUMP_SMOOTHING.name, time.time(), jst.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10) eaicr.clean_and_resample(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.CLEAN_RESAMPLING.name, time.time(), crt.elapsed) with ect.Timer() as crt: logging.info("*" * 10 + "UUID %s: inferring transportation mode" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: inferring transportation mode" % uuid + "*" * 10) eacimp.predict_mode(uuid) esds.store_pipeline_time(uuid, ecwp.PipelineStages.MODE_INFERENCE.name, time.time(), crt.elapsed) with ect.Timer() as ogt: logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) print(str(arrow.now()) + "*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10) # use store data uh.storeViewsToCache() esds.store_pipeline_time(uuid, ecwp.PipelineStages.OUTPUT_GEN.name, time.time(), ogt.elapsed)