def setUp(self):
     etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27")
     self.androidUUID = self.testUUID
     etc.setupRealExample(self, "emission/tests/data/real_examples/iphone_2015-11-06")
     self.iosUUID = self.testUUID
     eaicf.filter_accuracy(self.iosUUID)
     logging.debug("androidUUID = %s, iosUUID = %s" % (self.androidUUID, self.iosUUID))
예제 #2
0
 def testEmptyCall(self):
     # Check call to the entire filter accuracy with a zero length timeseries
     import emission.core.get_database as edb
     edb.get_timeseries_db().remove({"user_id": self.testUUID})
     # We expect that this should not throw
     eaicf.filter_accuracy(self.testUUID)
     self.assertEqual(len(self.ts.get_data_df("background/location")), 0)
 def testEmptyCall(self):
     # Check call to the entire filter accuracy with a zero length timeseries
     import emission.core.get_database as edb
     edb.get_timeseries_db().remove({"user_id": self.testUUID})
     # We expect that this should not throw
     eaicf.filter_accuracy(self.testUUID)
     self.assertEqual(len(self.ts.get_data_df("background/location")), 0)
 def setUp(self):
     etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27")
     self.androidUUID = self.testUUID
     etc.setupRealExample(self, "emission/tests/data/real_examples/iphone_2015-11-06")
     self.iosUUID = self.testUUID
     eaicf.filter_accuracy(self.iosUUID)
     logging.debug("androidUUID = %s, iosUUID = %s" % (self.androidUUID, self.iosUUID))
예제 #5
0
 def setUp(self):
     self.clearRelatedDb()
     etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27")
     eaicf.filter_accuracy(self.testUUID)
     estfm.move_all_filters_to_data()        
     logging.info("After loading, timeseries db size = %s" % edb.get_timeseries_db().count())
     self.day_start_ts = 1440658800
     self.day_end_ts = 1440745200
    def testFilterAccuracy(self):
        unfiltered_points_df = self.ts.get_data_df("background/location", None)
        self.assertEqual(len(unfiltered_points_df), 205)
        pre_filtered_points_df = self.ts.get_data_df("background/filtered_location", None)
        self.assertEqual(len(pre_filtered_points_df), 0)

        eaicf.filter_accuracy(self.testUUID)
        filtered_points_df = self.ts.get_data_df("background/filtered_location", None)
        self.assertEqual(len(filtered_points_df), 124)
예제 #7
0
 def setUp(self):
     etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27")
     eaicf.filter_accuracy(self.testUUID)
     estfm.move_all_filters_to_data()        
     logging.info("After loading, timeseries db size = %s" % edb.get_timeseries_db().count())
     self.day_start_ts = 1440658800
     self.day_end_ts = 1440745200
     self.day_start_dt = ecwl.LocalDate({'year': 2015, 'month': 8, 'day': 27})
     self.day_end_dt = ecwl.LocalDate({'year': 2015, 'month': 8, 'day': 27})
예제 #8
0
 def setUp(self):
     etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27")
     eaicf.filter_accuracy(self.testUUID)
     estfm.move_all_filters_to_data()        
     logging.info("After loading, timeseries db size = %s" % edb.get_timeseries_db().count())
     self.day_start_ts = 1440658800
     self.day_end_ts = 1440745200
     self.day_start_dt = ecwl.LocalDate({'year': 2015, 'month': 8, 'day': 27})
     self.day_end_dt = ecwl.LocalDate({'year': 2015, 'month': 8, 'day': 27})
    def setUp(self):
        etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27")
        self.androidUUID = self.testUUID
        eaicf.filter_accuracy(self.androidUUID)

        self.testUUID = uuid.UUID("c76a0487-7e5a-3b17-a449-47be666b36f6")
        self.entries = json.load(open("emission/tests/data/real_examples/iphone_2015-11-06"), object_hook = bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        self.iosUUID = self.testUUID
        eaicf.filter_accuracy(self.iosUUID)
    def setUp(self):
        etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27")
        self.androidUUID = self.testUUID

        self.testUUID = uuid.UUID("c76a0487-7e5a-3b17-a449-47be666b36f6")
        self.entries = json.load(open("emission/tests/data/real_examples/iphone_2015-11-06"), object_hook = bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        self.iosUUID = self.testUUID
        eaicf.filter_accuracy(self.iosUUID)
        logging.debug("androidUUID = %s, iosUUID = %s" % (self.androidUUID, self.iosUUID))
예제 #11
0
 def setUp(self):
   self.clearRelatedDb()
   etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27")
   eaicf.filter_accuracy(self.testUUID)
   estfm.move_all_filters_to_data()        
   logging.info("After loading, timeseries db size = %s" % edb.get_timeseries_db().count())
   self.day_start_ts = 1440658800
   self.day_end_ts = 1440745200
   self.day_start_dt = pydt.datetime(2015,8,27)
   self.day_end_dt = pydt.datetime(2015,8,28)
 def setUp(self):
     self.clearRelatedDb()
     edb.get_trip_db().remove()
     edb.get_section_db().remove()
     edb.get_trip_new_db().remove()
     edb.get_section_new_db().remove()
     etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-21")
     eaicf.filter_accuracy(self.testUUID)
     estfm.move_all_filters_to_data()
     eaist.segment_current_trips(self.testUUID)
     eaiss.segment_current_sections(self.testUUID)
예제 #13
0
    def testFilterAccuracy(self):
        unfiltered_points_df = self.ts.get_data_df("background/location", None)
        self.assertEqual(len(unfiltered_points_df), 205)
        pre_filtered_points_df = self.ts.get_data_df(
            "background/filtered_location", None)
        self.assertEqual(len(pre_filtered_points_df), 0)

        eaicf.filter_accuracy(self.testUUID)
        filtered_points_df = self.ts.get_data_df(
            "background/filtered_location", None)
        self.assertEqual(len(filtered_points_df), 124)
예제 #14
0
 def setUp(self):
     etc.setupRealExample(self,
                          "emission/tests/data/real_examples/shankari_2015-aug-27")
     eaicf.filter_accuracy(self.testUUID)
     estfm.move_all_filters_to_data()
     logging.info(
         "After loading, timeseries db size = %s" % edb.get_timeseries_db().count())
     self.day_start_ts = 1440658800
     self.day_end_ts = 1440745200
     self.day_start_dt = esdldq.get_local_date(self.day_start_ts, "America/Los_Angeles")
     self.day_end_dt = esdldq.get_local_date(self.day_end_ts, "America/Los_Angeles")
예제 #15
0
    def setUp(self):
        self.analysis_conf_path = \
            etc.set_analysis_config("intake.cleaning.filter_accuracy.enable", True)

        etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27")
        self.androidUUID = self.testUUID
        eaicf.filter_accuracy(self.androidUUID)

        self.testUUID = uuid.UUID("c76a0487-7e5a-3b17-a449-47be666b36f6")
        self.entries = json.load(open("emission/tests/data/real_examples/iphone_2015-11-06"), object_hook = bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        self.iosUUID = self.testUUID
        eaicf.filter_accuracy(self.iosUUID)
    def setUp(self):
        self.analysis_conf_path = \
            etc.set_analysis_config("intake.cleaning.filter_accuracy.enable", True)

        etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27")
        self.androidUUID = self.testUUID

        self.testUUID = uuid.UUID("c76a0487-7e5a-3b17-a449-47be666b36f6")
        self.entries = json.load(open("emission/tests/data/real_examples/iphone_2015-11-06"), object_hook = bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        self.iosUUID = self.testUUID
        eaicf.filter_accuracy(self.iosUUID)
        logging.debug("androidUUID = %s, iosUUID = %s" % (self.androidUUID, self.iosUUID))
예제 #17
0
 def testCreatePlace(self):
     etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27")
     eaicf.filter_accuracy(self.testUUID)
     estfm.move_all_filters_to_data()
     eaist.segment_current_trips(self.testUUID)
     eaiss.segment_current_sections(self.testUUID)
     data = eamtcp.main(self.testUUID)
     esdcpq.create_places(data, self.testUUID)
     places = esdcpq.get_all_common_places_for_user(self.testUUID)
     places_list = []
     for p in places:
         places_list.append(esdcpq.make_common_place(p))
     for place in places_list:
         self.assertIsNotNone(place.location)
         self.assertIsNotNone(place["successors"])
예제 #18
0
def runIntakePipeline(uuid):
    # Move these imports here so that we don't inadvertently load the modules,
    # and any related config modules, before we want to
    import emission.analysis.intake.cleaning.filter_accuracy as eaicf
    import emission.storage.timeseries.format_hacks.move_filter_field as estfm
    import emission.analysis.intake.segmentation.trip_segmentation as eaist
    import emission.analysis.intake.segmentation.section_segmentation as eaiss
    import emission.analysis.intake.cleaning.location_smoothing as eaicl
    import emission.analysis.intake.cleaning.clean_and_resample as eaicr
    import emission.analysis.classification.inference.mode.pipeline as eacimp

    eaicf.filter_accuracy(uuid)
    eaist.segment_current_trips(uuid)
    eaiss.segment_current_sections(uuid)
    eaicl.filter_current_sections(uuid)
    eaicr.clean_and_resample(uuid)
    eacimp.predict_mode(uuid)
예제 #19
0
def runIntakePipeline(uuid):
    # Move these imports here so that we don't inadvertently load the modules,
    # and any related config modules, before we want to
    import emission.analysis.intake.cleaning.filter_accuracy as eaicf
    import emission.storage.timeseries.format_hacks.move_filter_field as estfm
    import emission.analysis.intake.segmentation.trip_segmentation as eaist
    import emission.analysis.intake.segmentation.section_segmentation as eaiss
    import emission.analysis.intake.cleaning.location_smoothing as eaicl
    import emission.analysis.intake.cleaning.clean_and_resample as eaicr
    import emission.analysis.classification.inference.mode.pipeline as eacimp

    eaicf.filter_accuracy(uuid)
    eaist.segment_current_trips(uuid)
    eaiss.segment_current_sections(uuid)
    eaicl.filter_current_sections(uuid)
    eaicr.clean_and_resample(uuid)
    eacimp.predict_mode(uuid)
    def testCreateFromData(self):
        etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27")
        eaicf.filter_accuracy(self.testUUID)
        estfm.move_all_filters_to_data()
        eaist.segment_current_trips(self.testUUID)
        eaiss.segment_current_sections(self.testUUID)

        trips = esdctp.get_all_common_trips_for_user(self.testUUID)
        trips_list = []
        for p in trips:
            trips_list.append(esdctp.make_common_trip_from_json(p))
        for trip in trips_list:
            self.assertIsNotNone(trip.start_loc)
            self.assertIsNotNone(trip.end_loc)
            self.assertTrue(len(trip["trips"]) > 0)
            rand_id = random.choice(trip["trips"])
            self.assertEqual(type(esdtq.get_trip(rand_id)), ecwt.Trip) 
            self.assertTrue(trip.probabilites.sum() > 0)
            self.assertEqual(str(trip.user_id), "test2")
    def testCreateFromData(self):
        etc.setupRealExample(
            self, "emission/tests/data/real_examples/shankari_2015-aug-27")
        eaicf.filter_accuracy(self.testUUID)
        estfm.move_all_filters_to_data()
        eaist.segment_current_trips(self.testUUID)
        eaiss.segment_current_sections(self.testUUID)

        trips = esdctp.get_all_common_trips_for_user(self.testUUID)
        trips_list = []
        for p in trips:
            trips_list.append(esdctp.make_common_trip_from_json(p))
        for trip in trips_list:
            self.assertIsNotNone(trip.start_loc)
            self.assertIsNotNone(trip.end_loc)
            self.assertTrue(len(trip["trips"]) > 0)
            rand_id = random.choice(trip["trips"])
            self.assertEqual(type(esdtq.get_trip(rand_id)), ecwt.Trip)
            self.assertTrue(trip.probabilites.sum() > 0)
            self.assertEqual(str(trip.user_id), "test2")
 def setUp(self):
     etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27")
     eaicf.filter_accuracy(self.testUUID)
예제 #23
0
 def setUp(self):
     self.clearRelatedDb()
     etc.setupRealExample(
         self, "emission/tests/data/real_examples/shankari_2015-aug-27")
     eaicf.filter_accuracy(self.testUUID)
     estfm.move_all_filters_to_data()
예제 #24
0
def run_intake_pipeline_for_user(uuid):
    uh = euah.UserCacheHandler.getUserCacheHandler(uuid)

    logging.info("*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10)
    print(
        str(arrow.now()) + "*" * 10 + "UUID %s: moving to long term" % uuid +
        "*" * 10)

    uh.moveToLongTerm()

    # Hack until we delete these spurious entries
    # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868

    if edb.get_timeseries_db().find({"user_id": uuid}).count() == 0:
        logging.debug("Found no entries for %s, skipping" % uuid)
        return

    logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid +
                 "*" * 10)
    print(
        str(arrow.now()) + "*" * 10 +
        "UUID %s: filter accuracy if needed" % uuid + "*" * 10)
    eaicf.filter_accuracy(uuid)

    logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10)
    print(
        str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into trips" % uuid +
        "*" * 10)
    eaist.segment_current_trips(uuid)

    logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid +
                 "*" * 10)
    print(
        str(arrow.now()) + "*" * 10 +
        "UUID %s: segmenting into sections" % uuid + "*" * 10)
    eaiss.segment_current_sections(uuid)

    logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10)
    print(
        str(arrow.now()) + "*" * 10 + "UUID %s: smoothing sections" % uuid +
        "*" * 10)
    eaicl.filter_current_sections(uuid)

    logging.info("*" * 10 +
                 "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10)
    print(
        str(arrow.now()) + "*" * 10 +
        "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10)
    eaicr.clean_and_resample(uuid)

    logging.info("*" * 10 +
                 "UUID %s: checking active mode trips to autocheck habits" %
                 uuid + "*" * 10)
    print(
        str(arrow.now()) + "*" * 10 +
        "UUID %s: checking active mode trips to autocheck habits" % uuid +
        "*" * 10)
    autocheck.give_points_for_all_tasks(uuid)

    logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid +
                 "*" * 10)
    print(
        str(arrow.now()) + "*" * 10 +
        "UUID %s: storing views to cache" % uuid + "*" * 10)
    uh.storeViewsToCache()
예제 #25
0
def run_intake_pipeline_for_user(uuid):
        uh = euah.UserCacheHandler.getUserCacheHandler(uuid)

        with ect.Timer() as uct:
            logging.info("*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10)
            print(str(arrow.now()) + "*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10)
            uh.moveToLongTerm()

        esds.store_pipeline_time(uuid, ecwp.PipelineStages.USERCACHE.name,
                                 time.time(), uct.elapsed)


        # Hack until we delete these spurious entries
        # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868
        # Hack no longer works after the stats are in the timeseries because
        # every user, even really old ones, have the pipeline run for them,
        # which inserts pipeline_time stats.
        # Let's strip out users who only have pipeline_time entries in the timeseries
        # I wonder if this (distinct versus count) is the reason that the pipeline has
        # become so much slower recently. Let's try to actually delete the
        # spurious entries or at least mark them as obsolete and see if that helps.
        if edb.get_timeseries_db().find({"user_id": uuid}).distinct("metadata.key") == ["stats/pipeline_time"]:
            logging.debug("Found no entries for %s, skipping" % uuid)
            return

        with ect.Timer() as aft:
            logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10)
            print(str(arrow.now()) + "*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10)
            eaicf.filter_accuracy(uuid)

        esds.store_pipeline_time(uuid, ecwp.PipelineStages.ACCURACY_FILTERING.name,
                                 time.time(), aft.elapsed)

        with ect.Timer() as tst:
            logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10)
            print(str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10)
            eaist.segment_current_trips(uuid)

        esds.store_pipeline_time(uuid, ecwp.PipelineStages.TRIP_SEGMENTATION.name,
                                 time.time(), tst.elapsed)

        with ect.Timer() as sst:
            logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10)
            print(str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10)
            eaiss.segment_current_sections(uuid)

        esds.store_pipeline_time(uuid, ecwp.PipelineStages.SECTION_SEGMENTATION.name,
                                 time.time(), sst.elapsed)

        with ect.Timer() as jst:
            logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10)
            print(str(arrow.now()) + "*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10)
            eaicl.filter_current_sections(uuid)

        esds.store_pipeline_time(uuid, ecwp.PipelineStages.JUMP_SMOOTHING.name,
                                 time.time(), jst.elapsed)

        with ect.Timer() as crt:
            logging.info("*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10)
            print(str(arrow.now()) + "*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10)
            eaicr.clean_and_resample(uuid)

        esds.store_pipeline_time(uuid, ecwp.PipelineStages.CLEAN_RESAMPLING.name,
                                 time.time(), crt.elapsed)

        with ect.Timer() as act:
            logging.info("*" * 10 + "UUID %s: checking active mode trips to autocheck habits" % uuid + "*" * 10)
            print(str(arrow.now()) + "*" * 10 + "UUID %s: checking active mode trips to autocheck habits" % uuid + "*" * 10)
            autocheck.give_points_for_all_tasks(uuid)

        esds.store_pipeline_time(uuid, "AUTOCHECK_POINTS",
                                 time.time(), act.elapsed)

        with ect.Timer() as ogt:
            logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10)
            print(str(arrow.now()) + "*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10)
            uh.storeViewsToCache()

        esds.store_pipeline_time(uuid, ecwp.PipelineStages.OUTPUT_GEN.name,
                                 time.time(), ogt.elapsed)
 def setUp(self):
     self.copied_model_path = etc.copy_dummy_seed_for_inference()
     etc.setupRealExample(
         self, "emission/tests/data/real_examples/shankari_2015-aug-27")
     eaicf.filter_accuracy(self.testUUID)
예제 #27
0
    # TEST_PHONE_IDS are not critical - we can run a pipeline for them once a day
    filtered_long_term_uuid_list = [
        u for u in all_long_term_uuid_list if u not in estag.TEST_PHONE_IDS
    ]
    half = old_div(len(filtered_long_term_uuid_list), 2)
    long_term_uuid_list = filtered_long_term_uuid_list[half:]

    logging.info("*" * 10 + "long term UUID list = %s" % long_term_uuid_list)
    for uuid in long_term_uuid_list:
        if uuid is None:
            continue

        logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid +
                     "*" * 10)
        eaicf.filter_accuracy(uuid)

        logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid +
                     "*" * 10)
        eaist.segment_current_trips(uuid)

        logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid +
                     "*" * 10)
        eaiss.segment_current_sections(uuid)

        logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid +
                     "*" * 10)
        eaicl.filter_current_sections(uuid)

        logging.info("*" * 10 +
                     "UUID %s: cleaning and resampling timeline" % uuid +
 def setUp(self):
     self.clearRelatedDb()
     etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27")
     eaicf.filter_accuracy(self.testUUID)
     estfm.move_all_filters_to_data()
예제 #29
0
def runIntakePipeline(uuid):
    eaicf.filter_accuracy(uuid)
    eaist.segment_current_trips(uuid)
    eaiss.segment_current_sections(uuid)
    eaicl.filter_current_sections(uuid)
    eaicr.clean_and_resample(uuid)
    for uuid in cache_uuid_list:
        logging.info("*" * 10 + "UUID %s: moving to long term" % uuid + "*" * 10)
        uh = euah.UserCacheHandler.getUserCacheHandler(uuid)
        uh.moveToLongTerm()

    # TODO: For now, move filters from metadata to data. Once we get the
    # updated data collection clients to people, we don't need to do this any
    # more
    import emission.storage.timeseries.format_hacks.move_filter_field as estfm
    estfm.move_all_filters_to_data()

    long_term_uuid_list = esta.TimeSeries.get_uuid_list()
    logging.info("*" * 10 + "long term UUID list = %s" % long_term_uuid_list)
    for uuid in long_term_uuid_list:
        logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10)
        eaicf.filter_accuracy(uuid)
        
        logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10)
        eaist.segment_current_trips(uuid)

        logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10)
        eaiss.segment_current_sections(uuid)

        logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10)
        eaicl.filter_current_sections(uuid)

        logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10)
        uh = euah.UserCacheHandler.getUserCacheHandler(uuid)
        uh.storeViewsToCache()
예제 #31
0
def runIntakePipeline(uuid):
    eaicf.filter_accuracy(uuid)
    eaist.segment_current_trips(uuid)
    eaiss.segment_current_sections(uuid)
    eaicl.filter_current_sections(uuid)
    eaicr.clean_and_resample(uuid)
    def import_test_info(self):
	etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27")
        eaicf.filter_accuracy(self.testUUID)
	etc.runIntakePipeline(self.testUUID)
예제 #33
0
def runIntakePipeline(uuid):
    eaicf.filter_accuracy(uuid)
    estfm.move_all_filters_to_data()
    eaist.segment_current_trips(uuid)
    eaiss.segment_current_sections(uuid)
    eaicr.clean_and_resample(uuid)
예제 #34
0
def run_intake_pipeline_for_user(uuid):
    uh = euah.UserCacheHandler.getUserCacheHandler(uuid)

    with ect.Timer() as uct:
        logging.info("*" * 10 + "UUID %s: moving to long term" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: moving to long term" % uuid + "*" * 10)
        uh.moveToLongTerm()

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.USERCACHE.name,
                             time.time(), uct.elapsed)

    with ect.Timer() as uit:
        logging.info("*" * 10 +
                     "UUID %s: updating incoming user inputs" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: updating incoming user inputs" % uuid + "*" * 10)
        eaum.match_incoming_user_inputs(uuid)

    esds.store_pipeline_time(
        uuid, ecwp.PipelineStages.USER_INPUT_MATCH_INCOMING.name, time.time(),
        uct.elapsed)

    # Hack until we delete these spurious entries
    # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868
    # Hack no longer works after the stats are in the timeseries because
    # every user, even really old ones, have the pipeline run for them,
    # which inserts pipeline_time stats.
    # Let's strip out users who only have pipeline_time entries in the timeseries
    # I wonder if this (distinct versus count) is the reason that the pipeline has
    # become so much slower recently. Let's try to actually delete the
    # spurious entries or at least mark them as obsolete and see if that helps.
    if edb.get_timeseries_db().find({
            "user_id": uuid
    }).distinct("metadata.key") == ["stats/pipeline_time"]:
        logging.debug("Found no entries for %s, skipping" % uuid)
        return

    with ect.Timer() as aft:
        logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: filter accuracy if needed" % uuid + "*" * 10)
        eaicf.filter_accuracy(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.ACCURACY_FILTERING.name,
                             time.time(), aft.elapsed)

    with ect.Timer() as tst:
        logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: segmenting into trips" % uuid + "*" * 10)
        eaist.segment_current_trips(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.TRIP_SEGMENTATION.name,
                             time.time(), tst.elapsed)

    with ect.Timer() as sst:
        logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: segmenting into sections" % uuid + "*" * 10)
        eaiss.segment_current_sections(uuid)

    esds.store_pipeline_time(uuid,
                             ecwp.PipelineStages.SECTION_SEGMENTATION.name,
                             time.time(), sst.elapsed)

    with ect.Timer() as jst:
        logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: smoothing sections" % uuid + "*" * 10)
        eaicl.filter_current_sections(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.JUMP_SMOOTHING.name,
                             time.time(), jst.elapsed)

    with ect.Timer() as crt:
        logging.info("*" * 10 +
                     "UUID %s: cleaning and resampling timeline" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10)
        eaicr.clean_and_resample(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.CLEAN_RESAMPLING.name,
                             time.time(), crt.elapsed)

    with ect.Timer() as crt:
        logging.info("*" * 10 +
                     "UUID %s: inferring transportation mode" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: inferring transportation mode" % uuid + "*" * 10)
        eacimr.predict_mode(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.MODE_INFERENCE.name,
                             time.time(), crt.elapsed)

    with ect.Timer() as crt:
        logging.info("*" * 10 + "UUID %s: creating confirmed objects " % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: creating confirmed objects " % uuid + "*" * 10)
        eaum.create_confirmed_objects(uuid)

    esds.store_pipeline_time(uuid,
                             ecwp.PipelineStages.CREATE_CONFIRMED_OBJECTS.name,
                             time.time(), crt.elapsed)

    with ect.Timer() as ogt:
        logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: storing views to cache" % uuid + "*" * 10)
        uh.storeViewsToCache()

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.OUTPUT_GEN.name,
                             time.time(), ogt.elapsed)
예제 #35
0
 def import_test_info(self):
     etc.setupRealExample(
         self, "emission/tests/data/real_examples/shankari_2015-aug-27")
     eaicf.filter_accuracy(self.testUUID)
     etc.runIntakePipeline(self.testUUID)
 def setUp(self):
     self.copied_model_path = etc.copy_dummy_seed_for_inference()
     etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-27")
     eaicf.filter_accuracy(self.testUUID)
def run_pipeline():
    edb.pm_address = request.json['pm_address']
    print(edb.pm_address)
    # uuid is a filler and just needs to be consistent for each user.
    # These can be removed but require refactoring all code locations
    # that use the uuid.
    uuid = request.json['uuid']
    uh = euah.UserCacheHandler.getUserCacheHandler(uuid)

    with ect.Timer() as uct:
        logging.info("*" * 10 + "moving to long term" + "*" * 10)
        print(str(arrow.now()) + "*" * 10 + "moving to long term" + "*" * 10)
        uh.moveToLongTerm()

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.USERCACHE.name,
                             time.time(), uct.elapsed)


    # Hack until we delete these spurious entries
    # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868
    # Hack no longer works after the stats are in the timeseries because
    # every user, even really old ones, have the pipeline run for them,
    # which inserts pipeline_time stats.
    # Let's strip out users who only have pipeline_time entries in the timeseries
    # I wonder if this (distinct versus count) is the reason that the pipeline has
    # become so much slower recently. Let's try to actually delete the
    # spurious entries or at least mark them as obsolete and see if that helps.
    print(edb.get_timeseries_db().find({"user_id": uuid}).distinct("metadata.key"))

    if edb.get_timeseries_db().find({"user_id": uuid}).distinct("metadata.key") == ["stats/pipeline_time"]:
        logging.debug("Found no entries for %s, skipping" % uuid)
        return

    with ect.Timer() as aft:
        logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10)
        print(str(arrow.now()) + "*" * 10 + "UUID %s: filter accuracy if needed" % uuid + "*" * 10)
        eaicf.filter_accuracy(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.ACCURACY_FILTERING.name,
                             time.time(), aft.elapsed)

    with ect.Timer() as tst:
        logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10)
        print(str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into trips" % uuid + "*" * 10)
        eaist.segment_current_trips(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.TRIP_SEGMENTATION.name,
                             time.time(), tst.elapsed)

    with ect.Timer() as sst:
        logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10)
        print(str(arrow.now()) + "*" * 10 + "UUID %s: segmenting into sections" % uuid + "*" * 10)
        eaiss.segment_current_sections(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.SECTION_SEGMENTATION.name,
                             time.time(), sst.elapsed)

    with ect.Timer() as jst:
        logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10)
        print(str(arrow.now()) + "*" * 10 + "UUID %s: smoothing sections" % uuid + "*" * 10)
        eaicl.filter_current_sections(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.JUMP_SMOOTHING.name,
                             time.time(), jst.elapsed)

    with ect.Timer() as crt:
        logging.info("*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10)
        print(str(arrow.now()) + "*" * 10 + "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10)
        eaicr.clean_and_resample(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.CLEAN_RESAMPLING.name,
                             time.time(), crt.elapsed)

    with ect.Timer() as crt:
        logging.info("*" * 10 + "UUID %s: inferring transportation mode" % uuid + "*" * 10)
        print(str(arrow.now()) + "*" * 10 + "UUID %s: inferring transportation mode" % uuid + "*" * 10)
        eacimp.predict_mode(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.MODE_INFERENCE.name,
                             time.time(), crt.elapsed)

    with ect.Timer() as ogt:
        logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10)
        print(str(arrow.now()) + "*" * 10 + "UUID %s: storing views to cache" % uuid + "*" * 10)
        # use store data
        uh.storeViewsToCache()

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.OUTPUT_GEN.name,
                             time.time(), ogt.elapsed)
예제 #38
0
 def setUp(self):
     etc.setupRealExample(
         self, "emission/tests/data/real_examples/shankari_2015-aug-27")
     eaicf.filter_accuracy(self.testUUID)
def run_intake_pipeline_for_user(uuid):
    uh = euah.UserCacheHandler.getUserCacheHandler(uuid)

    with ect.Timer() as uct:
        logging.info("*" * 10 + "UUID %s: moving to long term" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: moving to long term" % uuid + "*" * 10)
        uh.moveToLongTerm()

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.USERCACHE.name,
                             time.time(), uct.elapsed)

    # Hack until we delete these spurious entries
    # https://github.com/e-mission/e-mission-server/issues/407#issuecomment-2484868

    if edb.get_timeseries_db().find({"user_id": uuid}).count() == 0:
        logging.debug("Found no entries for %s, skipping" % uuid)
        return

    with ect.Timer() as aft:
        logging.info("*" * 10 + "UUID %s: filter accuracy if needed" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: filter accuracy if needed" % uuid + "*" * 10)
        eaicf.filter_accuracy(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.ACCURACY_FILTERING.name,
                             time.time(), aft.elapsed)

    with ect.Timer() as tst:
        logging.info("*" * 10 + "UUID %s: segmenting into trips" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: segmenting into trips" % uuid + "*" * 10)
        eaist.segment_current_trips(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.TRIP_SEGMENTATION.name,
                             time.time(), tst.elapsed)

    with ect.Timer() as sst:
        logging.info("*" * 10 + "UUID %s: segmenting into sections" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: segmenting into sections" % uuid + "*" * 10)
        eaiss.segment_current_sections(uuid)

    esds.store_pipeline_time(uuid,
                             ecwp.PipelineStages.SECTION_SEGMENTATION.name,
                             time.time(), sst.elapsed)

    with ect.Timer() as jst:
        logging.info("*" * 10 + "UUID %s: smoothing sections" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: smoothing sections" % uuid + "*" * 10)
        eaicl.filter_current_sections(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.JUMP_SMOOTHING.name,
                             time.time(), jst.elapsed)

    with ect.Timer() as crt:
        logging.info("*" * 10 +
                     "UUID %s: cleaning and resampling timeline" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: cleaning and resampling timeline" % uuid + "*" * 10)
        eaicr.clean_and_resample(uuid)

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.CLEAN_RESAMPLING.name,
                             time.time(), crt.elapsed)

    with ect.Timer() as act:
        logging.info(
            "*" * 10 +
            "UUID %s: checking active mode trips to autocheck habits" % uuid +
            "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: checking active mode trips to autocheck habits" % uuid +
            "*" * 10)
        autocheck.give_points_for_all_tasks(uuid)

    esds.store_pipeline_time(uuid, "AUTOCHECK_POINTS", time.time(),
                             act.elapsed)

    with ect.Timer() as ogt:
        logging.info("*" * 10 + "UUID %s: storing views to cache" % uuid +
                     "*" * 10)
        print(
            str(arrow.now()) + "*" * 10 +
            "UUID %s: storing views to cache" % uuid + "*" * 10)
        uh.storeViewsToCache()

    esds.store_pipeline_time(uuid, ecwp.PipelineStages.OUTPUT_GEN.name,
                             time.time(), ogt.elapsed)