Example #1
0
def run_intake_pipeline_for_user(uuid):
    uh = euah.UserCacheHandler.getUserCacheHandler(uuid)

    with ect.Timer() as uct:
        logging.info("*" * 10 + "UUID %s: moving to long term" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: moving to long term" % uuid + "*" * 10)
        uh.moveToLongTerm()

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.USERCACHE.name,
                             time.time(), uct.elapsed)

    with ect.Timer() as uit:
        logging.info("*" * 10 +
                     "UUID %s: updating incoming user inputs" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: updating incoming user inputs" % uuid + "*" * 10)
        eaum.match_incoming_user_inputs(uuid)

    esds.store_pipeline_time(
        uuid, ecwp.PipelineStages.USER_INPUT_MATCH_INCOMING.name, time.time(),
        uct.elapsed)

    # Hack until we delete these spurious entries
    # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868
    # Hack no longer works after the stats are in the timeseries because
    # every user, even really old ones, have the pipeline run for them,
    # which inserts pipeline_time stats.
    # Let's strip out users who only have pipeline_time entries in the timeseries
    # I wonder if this (distinct versus count) is the reason that the pipeline has
    # become so much slower recently. Let's try to actually delete the
    # spurious entries or at least mark them as obsolete and see if that helps.
    if edb.get_timeseries_db().find({
            "user_id": uuid
    }).distinct("metadata.key") == ["stats/pipeline_time"]:
        logging.debug("Found no entries for %s, skipping" % uuid)
        return

    with ect.Timer() as aft:
        logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: filter accuracy if needed" % uuid + "*" * 10)
        eaicf.filter_accuracy(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.ACCURACY_FILTERING.name,
                             time.time(), aft.elapsed)

    with ect.Timer() as tst:
        logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: segmenting into trips" % uuid + "*" * 10)
        eaist.segment_current_trips(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.TRIP_SEGMENTATION.name,
                             time.time(), tst.elapsed)

    with ect.Timer() as sst:
        logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: segmenting into sections" % uuid + "*" * 10)
        eaiss.segment_current_sections(uuid)

    esds.store_pipeline_time(uuid,
                             ecwp.PipelineStages.SECTION_SEGMENTATION.name,
                             time.time(), sst.elapsed)

    with ect.Timer() as jst:
        logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: smoothing sections" % uuid + "*" * 10)
        eaicl.filter_current_sections(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.JUMP_SMOOTHING.name,
                             time.time(), jst.elapsed)

    with ect.Timer() as crt:
        logging.info("*" * 10 +
                     "UUID %s: cleaning and resampling timeline" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10)
        eaicr.clean_and_resample(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.CLEAN_RESAMPLING.name,
                             time.time(), crt.elapsed)

    with ect.Timer() as crt:
        logging.info("*" * 10 +
                     "UUID %s: inferring transportation mode" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: inferring transportation mode" % uuid + "*" * 10)
        eacimr.predict_mode(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.MODE_INFERENCE.name,
                             time.time(), crt.elapsed)

    with ect.Timer() as crt:
        logging.info("*" * 10 + "UUID %s: creating confirmed objects " % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: creating confirmed objects " % uuid + "*" * 10)
        eaum.create_confirmed_objects(uuid)

    esds.store_pipeline_time(uuid,
                             ecwp.PipelineStages.CREATE_CONFIRMED_OBJECTS.name,
                             time.time(), crt.elapsed)

    with ect.Timer() as ogt:
        logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: storing views to cache" % uuid + "*" * 10)
        uh.storeViewsToCache()

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.OUTPUT_GEN.name,
                             time.time(), ogt.elapsed)
def run_intake_pipeline_for_user(uuid):
    uh = euah.UserCacheHandler.getUserCacheHandler(uuid)

    with ect.Timer() as uct:
        logging.info("*" * 10 + "UUID %s: moving to long term" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: moving to long term" % uuid + "*" * 10)
        uh.moveToLongTerm()

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.USERCACHE.name,
                             time.time(), uct.elapsed)

    # Hack until we delete these spurious entries
    # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868

    if edb.get_timeseries_db().find({"user_id": uuid}).count() == 0:
        logging.debug("Found no entries for %s, skipping" % uuid)
        return

    with ect.Timer() as aft:
        logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: filter accuracy if needed" % uuid + "*" * 10)
        eaicf.filter_accuracy(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.ACCURACY_FILTERING.name,
                             time.time(), aft.elapsed)

    with ect.Timer() as tst:
        logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: segmenting into trips" % uuid + "*" * 10)
        eaist.segment_current_trips(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.TRIP_SEGMENTATION.name,
                             time.time(), tst.elapsed)

    with ect.Timer() as sst:
        logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: segmenting into sections" % uuid + "*" * 10)
        eaiss.segment_current_sections(uuid)

    esds.store_pipeline_time(uuid,
                             ecwp.PipelineStages.SECTION_SEGMENTATION.name,
                             time.time(), sst.elapsed)

    with ect.Timer() as jst:
        logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: smoothing sections" % uuid + "*" * 10)
        eaicl.filter_current_sections(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.JUMP_SMOOTHING.name,
                             time.time(), jst.elapsed)

    with ect.Timer() as crt:
        logging.info("*" * 10 +
                     "UUID %s: cleaning and resampling timeline" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10)
        eaicr.clean_and_resample(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.CLEAN_RESAMPLING.name,
                             time.time(), crt.elapsed)

    with ect.Timer() as act:
        logging.info(
            "*" * 10 +
            "UUID %s: checking active mode trips to autocheck habits" % uuid +
            "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: checking active mode trips to autocheck habits" % uuid +
            "*" * 10)
        autocheck.give_points_for_all_tasks(uuid)

    esds.store_pipeline_time(uuid, "AUTOCHECK_POINTS", time.time(),
                             act.elapsed)

    with ect.Timer() as ogt:
        logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: storing views to cache" % uuid + "*" * 10)
        uh.storeViewsToCache()

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.OUTPUT_GEN.name,
                             time.time(), ogt.elapsed)
Example #3
0
def before_request():
    print("START %s %s %s" % (datetime.now(), request.method, request.path))
    request.params.start_ts = time.time()
    request.params.timer = ect.Timer()
    request.params.timer.__enter__()
    logging.debug("START %s %s" % (request.method, request.path))
def run_pipeline():
    edb.pm_address = request.json['pm_address']
    print(edb.pm_address)
    # uuid is a filler and just needs to be consistent for each user.
    # These can be removed but require refactoring all code locations
    # that use the uuid.
    uuid = request.json['uuid']
    uh = euah.UserCacheHandler.getUserCacheHandler(uuid)

    with ect.Timer() as uct:
        logging.info("*" * 10 + "moving to long term" + "*" * 10)
        print(str(arrow.now()) + "*" * 10 + "moving to long term" + "*" * 10)
        uh.moveToLongTerm()

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.USERCACHE.name,
                             time.time(), uct.elapsed)


    # Hack until we delete these spurious entries
    # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868
    # Hack no longer works after the stats are in the timeseries because
    # every user, even really old ones, have the pipeline run for them,
    # which inserts pipeline_time stats.
    # Let's strip out users who only have pipeline_time entries in the timeseries
    # I wonder if this (distinct versus count) is the reason that the pipeline has
    # become so much slower recently. Let's try to actually delete the
    # spurious entries or at least mark them as obsolete and see if that helps.
    print(edb.get_timeseries_db().find({"user_id": uuid}).distinct("metadata.key"))

    if edb.get_timeseries_db().find({"user_id": uuid}).distinct("metadata.key") == ["stats/pipeline_time"]:
        logging.debug("Found no entries for %s, skipping" % uuid)
        return

    with ect.Timer() as aft:
        logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10)
        print(str(arrow.now()) + "*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10)
        eaicf.filter_accuracy(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.ACCURACY_FILTERING.name,
                             time.time(), aft.elapsed)

    with ect.Timer() as tst:
        logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10)
        print(str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10)
        eaist.segment_current_trips(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.TRIP_SEGMENTATION.name,
                             time.time(), tst.elapsed)

    with ect.Timer() as sst:
        logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10)
        print(str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10)
        eaiss.segment_current_sections(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.SECTION_SEGMENTATION.name,
                             time.time(), sst.elapsed)

    with ect.Timer() as jst:
        logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10)
        print(str(arrow.now()) + "*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10)
        eaicl.filter_current_sections(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.JUMP_SMOOTHING.name,
                             time.time(), jst.elapsed)

    with ect.Timer() as crt:
        logging.info("*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10)
        print(str(arrow.now()) + "*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10)
        eaicr.clean_and_resample(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.CLEAN_RESAMPLING.name,
                             time.time(), crt.elapsed)

    with ect.Timer() as crt:
        logging.info("*" * 10 + "UUID %s: inferring transportation mode" % uuid + "*" * 10)
        print(str(arrow.now()) + "*" * 10 + "UUID %s: inferring transportation mode" % uuid + "*" * 10)
        eacimp.predict_mode(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.MODE_INFERENCE.name,
                             time.time(), crt.elapsed)

    with ect.Timer() as ogt:
        logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10)
        print(str(arrow.now()) + "*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10)
        # use store data
        uh.storeViewsToCache()

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.OUTPUT_GEN.name,
                             time.time(), ogt.elapsed)