def tearDown(self):
   edb.get_analysis_timeseries_db().remove({'user_id': self.testUUID})
   del_result = proxy.habiticaProxy(self.testUUID, "DELETE",
                                    "/api/v3/user",
                                    {'password': "******"})
   edb.get_habitica_db().remove({'user_id': self.testUUID})
   logging.debug("in tearDown, result = %s" % del_result)
예제 #2
0
def del_objects_after(user_id, reset_ts, is_dry_run):
    del_query = {}
    # handle the user
    del_query.update({"user_id": user_id})

    del_query.update({"metadata.key": {"$in": ["inference/prediction", "analysis/inferred_section"]}})
    # all objects inserted here have start_ts and end_ts and are trip-like
    del_query.update({"data.start_ts": {"$gt": reset_ts}})
    logging.debug("After all updates, del_query = %s" % del_query)

    reset_pipeline_query = {"user_id": user_id, "pipeline_stage": ecwp.PipelineStages.MODE_INFERENCE.value}
    # Fuzz the TRIP_SEGMENTATION stage 5 mins because of
    # https://github.com/e-mission/e-mission-server/issues/333#issuecomment-312730217
    FUZZ_FACTOR = 5 * 60
    reset_pipeline_update = {'$set': {'last_processed_ts': reset_ts + FUZZ_FACTOR}}
    logging.info("About to reset stage %s to %s" 
        % (ecwp.PipelineStages.MODE_INFERENCE, reset_ts))
    

    logging.info("About to delete %d entries" 
        % edb.get_analysis_timeseries_db().find(del_query).count())
    logging.info("About to delete entries with keys %s" 
        % edb.get_analysis_timeseries_db().find(del_query).distinct("metadata.key"))
    
    if is_dry_run:
        logging.info("this is a dry-run, returning from del_objects_after without modifying anything")
    else:
        result = edb.get_analysis_timeseries_db().remove(del_query)
        logging.info("this is not a dry-run, result of deleting analysis entries is %s" % result)
        result = edb.get_pipeline_state_db().update_one(reset_pipeline_query, reset_pipeline_update)
        logging.info("this is not a dry-run, result of updating pipeline state is %s" % result.raw_result)
 def tearDown(self):
     edb.get_timeseries_db().remove({"user_id": self.testUUID})
     edb.get_analysis_timeseries_db().remove({"user_id": self.testUUID})
     edb.get_timeseries_db().remove({"user_id": "new_fake"})
     edb.get_analysis_timeseries_db().remove({"user_id": "new_fake"})
     edb.get_common_trip_db().drop()
     edb.get_common_place_db().drop()
예제 #4
0
def del_objects_after(user_id, reset_ts, is_dry_run):
    del_query = {}
    # handle the user
    del_query.update({"user_id": user_id})

    del_query.update({"metadata.key": {"$in": ["inference/prediction", "analysis/inferred_section"]}})
    # all objects inserted here have start_ts and end_ts and are trip-like
    del_query.update({"data.start_ts": {"$gt": reset_ts}})
    logging.debug("After all updates, del_query = %s" % del_query)

    reset_pipeline_query = {"pipeline_stage": ecwp.PipelineStages.MODE_INFERENCE.value}
    # Fuzz the TRIP_SEGMENTATION stage 5 mins because of
    # https://github.com/e-mission/e-mission-server/issues/333#issuecomment-312730217
    FUZZ_FACTOR = 5 * 60
    reset_pipeline_update = {'$set': {'last_processed_ts': reset_ts + FUZZ_FACTOR}}
    logging.info("About to reset stage %s to %s" 
        % (ecwp.PipelineStages.MODE_INFERENCE, reset_ts))
    

    logging.info("About to delete %d entries" 
        % edb.get_analysis_timeseries_db().find(del_query).count())
    logging.info("About to delete entries with keys %s" 
        % edb.get_analysis_timeseries_db().find(del_query).distinct("metadata.key"))
    
    if is_dry_run:
        logging.info("this is a dry-run, returning from del_objects_after without modifying anything")
    else:
        result = edb.get_analysis_timeseries_db().remove(del_query)
        logging.info("this is not a dry-run, result of deleting analysis entries is %s" % result)
 def clearRelatedDb(self):
     edb.get_timeseries_db().remove({"user_id": self.androidUUID})
     edb.get_analysis_timeseries_db().remove({"user_id": self.androidUUID})
     edb.get_pipeline_state_db().remove({"user_id": self.androidUUID})
     edb.get_timeseries_db().remove({"user_id": self.iosUUID})
     edb.get_analysis_timeseries_db().remove({"user_id": self.iosUUID})
     edb.get_pipeline_state_db().remove({"user_id": self.iosUUID})
예제 #6
0
 def tearDown(self):
     edb.get_timeseries_db().remove({"user_id": self.androidUUID}) 
     edb.get_timeseries_db().remove({"user_id": self.iosUUID}) 
     edb.get_usercache_db().remove({"user_id": self.androidUUID}) 
     edb.get_usercache_db().remove({"user_id": self.iosUUID}) 
     edb.get_analysis_timeseries_db().remove({"user_id": self.androidUUID})
     edb.get_analysis_timeseries_db().remove({"user_id": self.iosUUID})
 def tearDown(self):
   edb.get_analysis_timeseries_db().remove({'user_id': self.testUUID})
   del_result = proxy.habiticaProxy(self.testUUID, "DELETE",
                                    "/api/v3/user",
                                    {'password': "******"})
   edb.get_habitica_db().remove({'user_id': self.testUUID})
   logging.debug("in tearDown, result = %s" % del_result)
예제 #8
0
def del_all_objects(is_dry_run):
    del_query = {}
    del_query.update({
        "metadata.key": {
            "$in": ["inference/prediction", "analysis/inferred_section"]
        }
    })
    logging.info("About to delete %d entries" %
                 edb.get_analysis_timeseries_db().find(del_query).count())
    logging.info("About to delete entries with keys %s" %
                 edb.get_analysis_timeseries_db().find(del_query).distinct(
                     "metadata.key"))

    del_pipeline_query = {
        "pipeline_stage": ecwp.PipelineStages.MODE_INFERENCE.value
    }
    logging.info("About to delete pipeline entries for stage %s" %
                 ecwp.PipelineStages.MODE_INFERENCE)

    if is_dry_run:
        logging.info(
            "this is a dry-run, returning from del_objects_after without modifying anything"
        )
    else:
        result = edb.get_analysis_timeseries_db().delete_many(del_query)
        logging.info(
            "this is not a dry-run, result of deleting analysis entries is %s"
            % result.raw_result)
        result = edb.get_pipeline_state_db().delete_many(del_pipeline_query)
        logging.info(
            "this is not a dry-run, result of deleting pipeline state is %s" %
            result.raw_result)
예제 #9
0
 def clearRelatedDb(self):
     edb.get_timeseries_db().remove({"user_id": {"$in": self.testUUIDList}})
     edb.get_analysis_timeseries_db().remove(
         {"user_id": {
             "$in": self.testUUIDList
         }})
     edb.get_usercache_db().remove({"user_id": {"$in": self.testUUIDList}})
예제 #10
0
    def testReadWriteUser(self):
        try:
            rw_username = "******"
            rw_password = "******"
            self.admin_auth.command(
              {
                "createUser": rw_username,
                "pwd": rw_password,
                "roles": [ { "role": "readWrite", "db": "Stage_database" } ]
              }
            )
            result = self.admin_auth.command({"usersInfo": rw_username})
            self.assertEqual(result['ok'], 1.0)
            self.assertEqual(len(result['users']), 1)
            self.assertEqual(result['users'][0]['user'], rw_username)

            self.configureDB(self.getURL(rw_username, rw_password))

            import emission.tests.storageTests.analysis_ts_common as etsa
            import emission.storage.decorations.analysis_timeseries_queries as esda
            import emission.core.wrapper.rawplace as ecwrp
            import emission.storage.timeseries.abstract_timeseries as esta

            ts = esta.TimeSeries.get_time_series(self.uuid)
            etsa.createNewPlaceLike(self, esda.RAW_PLACE_KEY, ecwrp.Rawplace)
     
            inserted_df = ts.get_data_df(esda.RAW_PLACE_KEY)
            self.assertEqual(len(inserted_df), 1)
            self.assertEqual(len(ts.get_data_df(esda.CLEANED_PLACE_KEY)), 0)
        finally:
            import emission.core.get_database as edb

            edb.get_analysis_timeseries_db().delete_many({'user_id': self.testUserId})
 def clearRelatedDb(self):
     edb.get_timeseries_db().delete_many({"user_id": self.testUUID})
     edb.get_analysis_timeseries_db().delete_many({"user_id": self.testUUID})
     edb.get_pipeline_state_db().delete_many({"user_id": self.testUUID})
     edb.get_timeseries_db().delete_many({"user_id": self.testUUID1})
     edb.get_analysis_timeseries_db().delete_many({"user_id": self.testUUID1})
     edb.get_pipeline_state_db().delete_many({"user_id": self.testUUID1})
예제 #12
0
def del_objects_after(user_id, reset_ts, is_dry_run):
    del_query = {}
    # handle the user
    del_query.update({"user_id": user_id})

    date_query_list = []
    # handle all trip-like entries
    date_query_list.append({"data.start_ts": {"$gt": reset_ts}})
    # handle all place-like entries
    date_query_list.append({"data.enter_ts": {"$gt": reset_ts}})
    # handle all reconstructed points
    date_query_list.append({"data.ts": {"$gt": reset_ts}})

    del_query.update({"$or": date_query_list})
    logging.debug("After all updates, del_query = %s" % del_query)
    logging.info("About to delete %d entries" 
        % edb.get_analysis_timeseries_db().find(del_query).count())
    logging.info("About to delete entries with keys %s" 
        % edb.get_analysis_timeseries_db().find(del_query).distinct("metadata.key"))

    if is_dry_run:
        logging.info("this is a dry-run, returning from del_objects_after without modifying anything")
    else:
        result = edb.get_analysis_timeseries_db().remove(del_query)
        logging.info("this is not a dry-run, result of deleting analysis entries is %s" % result)
 def clearRelatedDb(self):
     edb.get_timeseries_db().remove({"user_id": self.androidUUID})
     edb.get_analysis_timeseries_db().remove({"user_id": self.androidUUID})
     edb.get_pipeline_state_db().remove({"user_id": self.androidUUID})
     edb.get_timeseries_db().remove({"user_id": self.iosUUID})
     edb.get_analysis_timeseries_db().remove({"user_id": self.iosUUID})
     edb.get_pipeline_state_db().remove({"user_id": self.iosUUID})
예제 #14
0
def reset_last_place(last_place, is_dry_run):
    if is_dry_run:
        logging.info("this is a dry-run, returning from reset_last_place without modifying anything" )
        return

    match_query = {"_id": last_place['_id']}
    logging.debug("match query = %s" % match_query)
    
    # Note that we need to reset the raw_place array
    # since it will be repopulated with new squished places 
    # when the timeline after the _entry_ to this place is reconstructed
    # Note that 
    # "If the field does not exist, then $unset does nothing (i.e. no
    # operation).", so this is still OK.
    reset_query = {'$unset' : {"data.exit_ts": "",
                               "data.exit_local_dt": "",
                               "data.exit_fmt_time": "",
                               "data.starting_trip": "",
                               "data.duration": ""
                               }}

    if last_place.metadata.key == esda.CLEANED_PLACE_KEY:
        reset_query.update({"$set": {"data.raw_places": []}})

    logging.debug("reset_query = %s" % reset_query)

    result = edb.get_analysis_timeseries_db().update(match_query, reset_query)
    logging.debug("this is not a dry run, result of update in reset_last_place = %s" % result)

    logging.debug("after update, entry is %s" %
                  edb.get_analysis_timeseries_db().find_one(match_query))
예제 #15
0
 def tearDown(self):
     os.remove(self.analysis_conf_path)
     edb.get_timeseries_db().remove({"user_id": self.androidUUID}) 
     edb.get_timeseries_db().remove({"user_id": self.iosUUID})
     edb.get_pipeline_state_db().remove({"user_id": self.androidUUID})
     edb.get_pipeline_state_db().remove({"user_id": self.iosUUID})
     edb.get_analysis_timeseries_db().remove({"user_id": self.androidUUID})
     edb.get_analysis_timeseries_db().remove({"user_id": self.iosUUID}) 
예제 #16
0
 def clearRelatedDb(self):
     edb.get_timeseries_db().delete_many({"user_id": self.testUUID})
     edb.get_analysis_timeseries_db().delete_many(
         {"user_id": self.testUUID})
     edb.get_pipeline_state_db().delete_many({"user_id": self.testUUID})
     edb.get_timeseries_db().delete_many({"user_id": self.testUUID1})
     edb.get_analysis_timeseries_db().delete_many(
         {"user_id": self.testUUID1})
     edb.get_pipeline_state_db().delete_many({"user_id": self.testUUID1})
예제 #17
0
    def testReadOnlyUser(self):
        try:
            ro_username = "******"
            ro_password = "******"
            self.stagedb_auth = pymongo.MongoClient(self.getURL(self.test_username, self.test_password)).Stage_database
            self.stagedb_auth.command(
              {
                "createRole": "createIndex",
                 "privileges": [
                    { "resource": { "db": "Stage_database", "collection": "" },
                                    "actions": [ "createIndex"] }
                  ],
                  "roles": []
              }
            )
            role_result = self.stagedb_auth.command({ "rolesInfo": 1, "showBuiltinRoles": False, "showPrivileges": True})
            logging.debug("role_result = %s" % role_result)
            self.assertEqual(role_result['ok'], 1.0)
            self.assertEqual(len(role_result['roles']), 1)
            self.assertEqual(role_result['roles'][0]['role'], "createIndex")
            self.assertEqual(role_result['roles'][0]['db'], "Stage_database")
            self.assertEqual(len(role_result['roles'][0]['privileges']), 1)
            self.assertEqual(role_result['roles'][0]['privileges'][0]["actions"], ["createIndex"])

            self.admin_auth.command(
              {
                "createUser": ro_username,
                "pwd": ro_password,
                "roles": [ { "role": "read", "db": "Stage_database" },
                           { "role": "createIndex", "db": "Stage_database"} ]
              }
            )
            result = self.admin_auth.command({"usersInfo": ro_username})
            self.assertEqual(result['ok'], 1.0)
            self.assertEqual(len(result['users']), 1)
            self.assertEqual(result['users'][0]['user'], ro_username)

            self.configureDB(self.getURL(ro_username, ro_password))

            import emission.tests.storageTests.analysis_ts_common as etsa
            import emission.storage.decorations.analysis_timeseries_queries as esda
            import emission.core.wrapper.rawplace as ecwrp
            import emission.storage.timeseries.abstract_timeseries as esta

            ts = esta.TimeSeries.get_time_series(self.uuid)
            with self.assertRaises(pymongo.errors.OperationFailure):
                etsa.createNewPlaceLike(self, esda.RAW_PLACE_KEY, ecwrp.Rawplace)
     
            inserted_df = ts.get_data_df(esda.RAW_PLACE_KEY)
            self.assertEqual(len(inserted_df), 0)
            self.assertEqual(len(ts.get_data_df(esda.CLEANED_PLACE_KEY)), 0)
        finally:
            import emission.core.get_database as edb

            with self.assertRaises(pymongo.errors.OperationFailure):
                edb.get_analysis_timeseries_db().delete_many({'user_id': self.testUserId})
            self.stagedb_auth.command({"dropAllRolesFromDatabase": 1})
예제 #18
0
def savePlaceLike(utest, key, wrapper):
    new_place = createNewPlaceLike(utest, key, wrapper)
    utest.assertEqual(edb.get_analysis_timeseries_db().count_documents(
        {"metadata.key": key, "data.exit_ts": 6}), 1)
    utest.assertEqual(edb.get_analysis_timeseries_db().find_one(
        {"metadata.key": key, "data.exit_ts": 6})["_id"], new_place.get_id())
    utest.assertEqual(edb.get_analysis_timeseries_db().find_one(
        {"metadata.key": key, "data.exit_ts": 6})["user_id"], utest.testUserId)
    return new_place
예제 #19
0
def _get_sections_for_query(section_query, sort_field):
    section_query.update({"metadata.key": "segmentation/raw_section"})
    logging.debug("Returning sections for query %s" % section_query)
    section_doc_cursor = edb.get_analysis_timeseries_db().find(
        section_query).sort(sort_field, pymongo.ASCENDING)
    logging.debug(
        "result length = %d" %
        edb.get_analysis_timeseries_db().count_documents(section_query))
    return [ecwe.Entry(doc) for doc in section_doc_cursor]
예제 #20
0
def saveTripLike(utest, key, wrapper):
    new_trip = createNewTripLike(utest, key, wrapper)
    utest.assertEqual(edb.get_analysis_timeseries_db().find(
        {"metadata.key": key, "data.end_ts": 6}).count(), 1)
    utest.assertEqual(edb.get_analysis_timeseries_db().find_one(
        {"metadata.key": key, "data.end_ts": 6})["_id"], new_trip.get_id())
    utest.assertEqual(edb.get_analysis_timeseries_db().find_one(
        {"metadata.key": key, "data.end_ts": 6})["user_id"], utest.testUserId)
    return new_trip
예제 #21
0
def _get_stops_for_query(stop_query, sort_key):
    logging.debug("Returning stops for query %s" % stop_query)
    stop_query.update({"metadata.key": "segmentation/raw_stop"})
    logging.debug("updated query = %s" % stop_query)
    stop_doc_cursor = edb.get_analysis_timeseries_db().find(stop_query).sort(
        sort_key, pymongo.ASCENDING)
    logging.debug("result count = %d" %
                  edb.get_analysis_timeseries_db().count_documents(stop_query))
    return [ecwe.Entry(doc) for doc in stop_doc_cursor]
def savePlaceLike(utest, key, wrapper):
    new_place = createNewPlaceLike(utest, key, wrapper)
    utest.assertEqual(edb.get_analysis_timeseries_db().find(
        {"metadata.key": key, "data.exit_ts": 6}).count(), 1)
    utest.assertEqual(edb.get_analysis_timeseries_db().find_one(
        {"metadata.key": key, "data.exit_ts": 6})["_id"], new_place.get_id())
    utest.assertEqual(edb.get_analysis_timeseries_db().find_one(
        {"metadata.key": key, "data.exit_ts": 6})["user_id"], utest.testUserId)
    return new_place
예제 #23
0
def del_objects(args):
    del_query = {}
    if args.user_id != "all":
        del_query['user_id'] = uuid.UUID(args.user_id)

    if args.date is None:
        print("Deleting all analysis information for query %s" % del_query)
        print edb.get_analysis_timeseries_db().remove(del_query)
        print edb.get_common_place_db().remove(del_query)
        print edb.get_common_trip_db().remove(del_query)
예제 #24
0
 def clearRelatedDb(self):
     edb.get_timeseries_db().delete_many(
         {"user_id": {
             "$in": self.testUUIDList
         }})
     edb.get_analysis_timeseries_db().delete_many(
         {"user_id": {
             "$in": self.testUUIDList
         }})
     edb.get_usercache_db().delete_many(
         {"user_id": {
             "$in": self.testUUIDList
         }})
     edb.get_uuid_db().delete_many({"user_id": {"$in": self.testUUIDList}})
예제 #25
0
def purge_entries_for_user(curr_uuid, is_purge_state, db_array=None):
    logging.info("For uuid = %s, deleting entries from the timeseries" % curr_uuid)
    if db_array is not None:
        [ts_db, ats_db, udb, psdb] = db_array
        logging.debug("db_array passed in with databases %s" % db_array)
    else:
        import emission.core.get_database as edb

        ts_db = edb.get_timeseries_db()
        ats_db = edb.get_analysis_timeseries_db()
        udb = edb.get_uuid_db()
        psdb = edb.get_pipeline_state_db()
        logging.debug("db_array not passed in, looking up databases")

    timeseries_del_result = ts_db.remove({"user_id": curr_uuid})
    logging.info("result = %s" % timeseries_del_result)

    logging.info("For uuid = %s, deleting entries from the analysis_timeseries" % curr_uuid)
    analysis_timeseries_del_result = ats_db.remove({"user_id": curr_uuid})
    logging.info("result = %s" % analysis_timeseries_del_result)

    logging.info("For uuid %s, deleting entries from the user_db" % curr_uuid)
    user_db_del_result = udb.remove({"uuid": curr_uuid})
    logging.info("result = %s" % user_db_del_result)

    if is_purge_state:
        logging.info("For uuid %s, deleting entries from the pipeline_state_db" % curr_uuid)
        psdb_del_result = psdb.remove({"user_id": curr_uuid})
        logging.info("result = %s" % psdb_del_result)
 def __init__(self, user_id):
     super(BuiltinTimeSeries, self).__init__(user_id)
     self.key_query = lambda(key): {"metadata.key": key}
     self.type_query = lambda(entry_type): {"metadata.type": entry_type}
     self.user_query = {"user_id": self.user_id} # UUID is mandatory for this version
     self.timeseries_db = edb.get_timeseries_db()
     self.analysis_timeseries_db = edb.get_analysis_timeseries_db()
     self.ts_map = {
             "background/location": self.timeseries_db,
             "background/filtered_location": self.timeseries_db,
             "background/motion_activity": self.timeseries_db,
             "background/battery": self.timeseries_db,
             "statemachine/transition": self.timeseries_db,
             "config/sensor_config": self.timeseries_db,
             "segmentation/raw_trip": self.analysis_timeseries_db,
             "segmentation/raw_place": self.analysis_timeseries_db,
             "segmentation/raw_section": self.analysis_timeseries_db,
             "segmentation/raw_stop": self.analysis_timeseries_db,
             "analysis/smoothing": self.analysis_timeseries_db,
             "analysis/cleaned_trip": self.analysis_timeseries_db,
             "analysis/cleaned_place": self.analysis_timeseries_db,
             "analysis/cleaned_section": self.analysis_timeseries_db,
             "analysis/cleaned_stop": self.analysis_timeseries_db,
             "analysis/recreated_location": self.analysis_timeseries_db,
         }
예제 #27
0
def purge_entries_for_user(curr_uuid, is_purge_state, db_array=None):
    logging.info("For uuid = %s, deleting entries from the timeseries" %
                 curr_uuid)
    if db_array is not None:
        [ts_db, ats_db, udb, psdb] = db_array
        logging.debug("db_array passed in with databases %s" % db_array)
    else:
        import emission.core.get_database as edb

        ts_db = edb.get_timeseries_db()
        ats_db = edb.get_analysis_timeseries_db()
        udb = edb.get_uuid_db()
        psdb = edb.get_pipeline_state_db()
        logging.debug("db_array not passed in, looking up databases")

    timeseries_del_result = ts_db.remove({"user_id": curr_uuid})
    logging.info("result = %s" % timeseries_del_result)

    logging.info(
        "For uuid = %s, deleting entries from the analysis_timeseries" %
        curr_uuid)
    analysis_timeseries_del_result = ats_db.remove({"user_id": curr_uuid})
    logging.info("result = %s" % analysis_timeseries_del_result)

    logging.info("For uuid %s, deleting entries from the user_db" % curr_uuid)
    user_db_del_result = udb.remove({"uuid": curr_uuid})
    logging.info("result = %s" % user_db_del_result)

    if is_purge_state:
        logging.info(
            "For uuid %s, deleting entries from the pipeline_state_db" %
            curr_uuid)
        psdb_del_result = psdb.remove({"user_id": curr_uuid})
        logging.info("result = %s" % psdb_del_result)
예제 #28
0
def post_check(unique_user_list, all_rerun_list):
    import emission.core.get_database as edb
    import numpy as np

    logging.info(
        "For %s users, loaded %s raw entries, %s processed entries and %s pipeline states"
        % (len(unique_user_list), edb.get_timeseries_db().count_documents(
            {"user_id": {
                "$in": list(unique_user_list)
            }}), edb.get_analysis_timeseries_db().count_documents(
                {"user_id": {
                    "$in": list(unique_user_list)
                }}), edb.get_pipeline_state_db().count_documents({
                    "user_id": {
                        "$in": list(unique_user_list)
                    }
                })))

    all_rerun_arr = np.array(all_rerun_list)

    # want to check if no entry needs a rerun? In this case we are done
    # no entry needs a rerun = all entries are false, not(all entries) are true
    if np.all(np.logical_not(all_rerun_list)):
        logging.info(
            "all entries in the timeline contain analysis results, no need to run the intake pipeline"
        )
    # if all entries need to be re-run, we must have had raw data throughout
    elif np.all(all_rerun_list):
        logging.info(
            "all entries in the timeline contain only raw data, need to run the intake pipeline"
        )
    else:
        logging.info(
            "timeline contains a mixture of analysis results and raw data - complain to shankari!"
        )
예제 #29
0
def _get_sections_for_query(section_query, sort_field):
    section_query.update({"metadata.key": "segmentation/raw_section"})
    logging.debug("Returning sections for query %s" % section_query)
    section_doc_cursor = edb.get_analysis_timeseries_db().find(
        section_query).sort(sort_field, pymongo.ASCENDING)
    logging.debug("result cursor length = %d" % section_doc_cursor.count())
    return [ecwe.Entry(doc) for doc in section_doc_cursor]
def export_timeline(user_id_str, start_day_str, end_day_str, file_name):
    logging.info(
        "Extracting trips for user %s day %s -> %s and saving to file %s" %
        (user_id_str, start_day_str, end_day_str, file))

    # day_dt = pydt.datetime.strptime(day_str, "%Y-%m-%d").date()
    start_day_dt = pydt.datetime.strptime(start_day_str, "%Y-%m-%d")
    end_day_dt = pydt.datetime.strptime(end_day_str, "%Y-%m-%d")
    logging.debug("start_day_dt is %s, end_day_dt is %s" %
                  (start_day_dt, end_day_dt))
    # TODO: Convert to call to get_timeseries once we get that working
    # Or should we even do that?
    query = {
        'user_id': uuid.UUID(user_id_str),
        'start_local_dt': {
            '$gt': start_day_dt,
            "$lt": end_day_dt
        }
    }
    print("query = %s" % query)
    entry_list = list(edb.get_analysis_timeseries_db().find(query))
    logging.info("Found %d entries" % len(entry_list))
    json.dump(entry_list,
              open(file_name, "w"),
              default=bju.default,
              allow_nan=False,
              indent=4)
예제 #31
0
 def __init__(self, user_id):
     super(BuiltinTimeSeries, self).__init__(user_id)
     self.key_query = lambda (key): {"metadata.key": key}
     self.type_query = lambda (entry_type): {"metadata.type": entry_type}
     self.user_query = {
         "user_id": self.user_id
     }  # UUID is mandatory for this version
     self.timeseries_db = edb.get_timeseries_db()
     self.analysis_timeseries_db = edb.get_analysis_timeseries_db()
     self.ts_map = {
         "background/location": self.timeseries_db,
         "background/filtered_location": self.timeseries_db,
         "background/motion_activity": self.timeseries_db,
         "background/battery": self.timeseries_db,
         "statemachine/transition": self.timeseries_db,
         "config/sensor_config": self.timeseries_db,
         "segmentation/raw_trip": self.analysis_timeseries_db,
         "segmentation/raw_place": self.analysis_timeseries_db,
         "segmentation/raw_section": self.analysis_timeseries_db,
         "segmentation/raw_stop": self.analysis_timeseries_db,
         "analysis/smoothing": self.analysis_timeseries_db,
         "analysis/cleaned_trip": self.analysis_timeseries_db,
         "analysis/cleaned_place": self.analysis_timeseries_db,
         "analysis/cleaned_section": self.analysis_timeseries_db,
         "analysis/cleaned_stop": self.analysis_timeseries_db,
         "analysis/recreated_location": self.analysis_timeseries_db,
     }
예제 #32
0
def _get_stops_for_query(stop_query, sort_key):
    logging.debug("Returning stops for query %s" % stop_query)
    stop_query.update({"metadata.key": "segmentation/raw_stop"})
    logging.debug("updated query = %s" % stop_query)
    stop_doc_cursor = edb.get_analysis_timeseries_db().find(stop_query).sort(
        sort_key, pymongo.ASCENDING)
    logging.debug("result count = %d" % stop_doc_cursor.count())
    return [ecwe.Entry(doc) for doc in stop_doc_cursor]
예제 #33
0
def get_stops_for_trip(key, user_id, trip_id):
    """
    Get the set of sections that are children of this trip.
    """
    query = {"user_id": user_id, "data.trip_id": trip_id,
             "metadata.key": key}
    logging.debug("About to execute query %s with sort_key %s" % (query, "data.enter_ts"))
    stop_doc_cursor = edb.get_analysis_timeseries_db().find(query).sort(
        "data.enter_ts", pymongo.ASCENDING)
    return [ecwe.Entry(doc) for doc in stop_doc_cursor]
예제 #34
0
def get_stops_for_trip(key, user_id, trip_id):
    """
    Get the set of sections that are children of this trip.
    """
    query = {"user_id": user_id, "data.trip_id": trip_id,
             "metadata.key": key}
    logging.debug("About to execute query %s with sort_key %s" % (query, "data.enter_ts"))
    stop_doc_cursor = edb.get_analysis_timeseries_db().find(query).sort(
        "data.enter_ts", pymongo.ASCENDING)
    return [ecwe.Entry(doc) for doc in stop_doc_cursor]
예제 #35
0
def del_all_objects(is_dry_run):
    del_query = {}
    del_query.update({"metadata.key": {"$in": ["inference/prediction", "analysis/inferred_section"]}})
    logging.info("About to delete %d entries" 
        % edb.get_analysis_timeseries_db().find(del_query).count())
    logging.info("About to delete entries with keys %s" 
        % edb.get_analysis_timeseries_db().find(del_query).distinct("metadata.key"))

    del_pipeline_query = {"pipeline_stage": ecwp.PipelineStages.MODE_INFERENCE.value}
    logging.info("About to delete pipeline entries for stage %s" %
        ecwp.PipelineStages.MODE_INFERENCE)

    if is_dry_run:
        logging.info("this is a dry-run, returning from del_objects_after without modifying anything")
    else:
        result = edb.get_analysis_timeseries_db().delete_many(del_query)
        logging.info("this is not a dry-run, result of deleting analysis entries is %s" % result.raw_result)
        result = edb.get_pipeline_state_db().delete_many(del_pipeline_query)
        logging.info("this is not a dry-run, result of deleting pipeline state is %s" % result.raw_result)
예제 #36
0
def get_sections_for_trip(key, user_id, trip_id):
    # type: (UUID, object_id) -> list(sections)
    """
    Get the set of sections that are children of this trip.
    """
    query = {"user_id": user_id, "data.trip_id": trip_id,
             "metadata.key": key}
    section_doc_cursor = edb.get_analysis_timeseries_db().find(query).sort(
        "data.start_ts", pymongo.ASCENDING)
    logging.debug("About to execute query %s" % query)
    return [ecwe.Entry(doc) for doc in section_doc_cursor]
예제 #37
0
def _del_entries_for_query(del_query, is_dry_run):
    """
        This is much easier. The steps are:
        - delete all analysis objects for this user
        - delete all pipeline states for this user
    """
    logging.info("About to delete %s analysis results" %
                    edb.get_analysis_timeseries_db().find(del_query).count())
    logging.info("About to delete entries with keys %s" 
        % edb.get_analysis_timeseries_db().find(del_query).distinct("metadata.key"))
    logging.info("About to delete %s pipeline states" % 
            (edb.get_pipeline_state_db().find(del_query).count()))

    if is_dry_run:
        logging.info("this is a dry run, returning from reset_user_to-start without modifying anything")
    else: 
        result = edb.get_analysis_timeseries_db().remove(del_query)
        logging.info("this is not a dry run, result of removing analysis objects = %s" % result)
        result = edb.get_pipeline_state_db().remove(del_query)
        logging.info("this is not a dry run, result of removing pipeline states = %s" % result)
def move_ts_entries(key):
    tdb = edb.get_timeseries_db()
    atdb = edb.get_analysis_timeseries_db()

    result_cursor = tdb.find({'metadata.key': key})
    logging.info("About to convert %s entries" % result_cursor.count())

    for i, entry_doc in enumerate(result_cursor):
        if i % 10000 == 0:
            print "moved %s from one ts to the other" % (entry_doc)
        atdb.insert(entry_doc)
        tdb.remove(entry_doc)
예제 #39
0
    def testReadWriteUser(self):
        try:
            rw_username = "******"
            rw_password = "******"
            self.admin_auth.command({
                "createUser":
                rw_username,
                "pwd":
                rw_password,
                "roles": [{
                    "role": "readWrite",
                    "db": "Stage_database"
                }]
            })
            result = self.admin_auth.command({"usersInfo": rw_username})
            self.assertEqual(result['ok'], 1.0)
            self.assertEqual(len(result['users']), 1)
            self.assertEqual(result['users'][0]['user'], rw_username)

            self.configureDB(self.getURL(rw_username, rw_password))

            import emission.tests.storageTests.analysis_ts_common as etsa
            import emission.storage.decorations.analysis_timeseries_queries as esda
            import emission.core.wrapper.rawplace as ecwrp
            import emission.storage.timeseries.abstract_timeseries as esta

            ts = esta.TimeSeries.get_time_series(self.uuid)
            etsa.createNewPlaceLike(self, esda.RAW_PLACE_KEY, ecwrp.Rawplace)

            inserted_df = ts.get_data_df(esda.RAW_PLACE_KEY)
            self.assertEqual(len(inserted_df), 1)
            self.assertEqual(len(ts.get_data_df(esda.CLEANED_PLACE_KEY)), 0)
        finally:
            import emission.core.get_database as edb

            edb.get_analysis_timeseries_db().delete_many(
                {'user_id': self.testUserId})
def export_timeline(user_id_str, start_day_str, end_day_str, file_name):
    logging.info("Extracting trips for user %s day %s -> %s and saving to file %s" %
                 (user_id_str, start_day_str, end_day_str, file))

    # day_dt = pydt.datetime.strptime(day_str, "%Y-%m-%d").date()
    start_day_dt = pydt.datetime.strptime(start_day_str, "%Y-%m-%d")
    end_day_dt = pydt.datetime.strptime(end_day_str, "%Y-%m-%d")
    logging.debug("start_day_dt is %s, end_day_dt is %s" % (start_day_dt, end_day_dt))
    # TODO: Convert to call to get_timeseries once we get that working
    # Or should we even do that?
    query = {'user_id': uuid.UUID(user_id_str), 'start_local_dt': {'$gt': start_day_dt, "$lt": end_day_dt}}
    print("query = %s" % query)
    entry_list = list(edb.get_analysis_timeseries_db().find(query))
    logging.info("Found %d entries" % len(entry_list))
    json.dump(entry_list, open(file_name, "w"), default=bju.default, allow_nan=False, indent=4)
예제 #41
0
def move_ts_entries(key):
    tdb = edb.get_timeseries_db()
    atdb = edb.get_analysis_timeseries_db()

    result_cursor = tdb.find({'metadata.key': key})
    logging.info("About to convert %s entries" % result_cursor.count())

    for i, entry_doc in enumerate(result_cursor):
      try:
        if i % 10000 == 0:
            print("moved %s from one ts to the other" % (entry_doc))
        atdb.insert(entry_doc)
        # tdb.remove(entry_doc)
      except:
        logging.info("Got error while moving %s, skipping" % (entry_doc))
예제 #42
0
def _get_inference_entry_for_section(user_id, section_id, entry_key, section_id_key):
    prediction_key_query = {"metadata.key": entry_key}
    inference_query = {"user_id": user_id, section_id_key: section_id}
    combo_query = copy.copy(prediction_key_query)
    combo_query.update(inference_query)
    logging.debug("About to query %s" % combo_query)
    ret_list = list(edb.get_analysis_timeseries_db().find(combo_query))
    # We currently have only one algorithm
    assert len(ret_list) <= 1, "Found len(ret_list) = %d, expected <=1" % len(ret_list)
    if len(ret_list) == 0:
        logging.debug("Found no inferred prediction, returning None")
        return None
    
    assert len(ret_list) == 1, "Found ret_list of length %d, expected 1" % len(ret_list)
    curr_prediction = ecwe.Entry(ret_list[0])
    return curr_prediction
예제 #43
0
 def clearRelatedDb(self):
     logging.info(
         "Timeseries delete result %s" %
         edb.get_timeseries_db().delete_many({
             "user_id": self.testUUID
         }).raw_result)
     logging.info("Analysis delete result %s" %
                  edb.get_analysis_timeseries_db().delete_many({
                      "user_id":
                      self.testUUID
                  }).raw_result)
     logging.info(
         "Usercache delete result %s" %
         edb.get_usercache_db().delete_many({
             "user_id": self.testUUID
         }).raw_result)
예제 #44
0
def del_objects(args):
    del_query = {}
    if args.user_id != "all":
        del_query['user_id'] = uuid.UUID(args.user_id)

    trip_query = copy.copy(del_query)
    trip_query.update({
        "metadata.key": {
            "$in": [
                "segmentation/raw_trip", "analysis/cleaned_trip",
                "segmentation/raw_section", "analysis/cleaned_section"
            ]
        }
    })

    place_query = copy.copy(del_query)
    place_query.update({
        "metadata.key": {
            "$in": [
                "segmentation/raw_place", "analysis/cleaned_place",
                "segmentation/raw_stop", "analysis/cleaned_stop"
            ]
        }
    })

    point_query = copy.copy(del_query)
    point_query.update(
        {"metadata.key": {
            "$in": ["analysis/recreated_location"]
        }})

    if args.date is None:
        logging.debug("no date specified, deleting everything")
    else:
        day_dt = pydt.datetime.strptime(args.date, "%Y-%m-%d")
        logging.debug("day_dt is %s" % day_dt)
        day_ts = time.mktime(day_dt.timetuple())
        logging.debug("day_ts is %s" % day_ts)
        trip_query.update({"data.start_ts": {"$gt": day_ts}})
        place_query.update({"data.exit_ts": {"$gt": day_ts}})
        point_query.update({"data.ts": {"$gt": day_ts}})

    print "trip_query = %s" % trip_query
    print "place_query = %s" % place_query
    print "point_query = %s" % point_query

    # Since sections have the same basic structure as trips and stops have the
    # same basic structure as places, we can reuse the queries
    print "Deleting trips/sections for %s after %s" % (args.user_id, args.date)
    print edb.get_analysis_timeseries_db().remove(trip_query)
    print "Deleting places/stops for %s after %s" % (args.user_id, args.date)
    print edb.get_analysis_timeseries_db().remove(place_query)
    print "Deleting points for %s after %s" % (args.user_id, args.date)
    print edb.get_analysis_timeseries_db().remove(point_query)
예제 #45
0
def _get_inference_entry_for_section(user_id, section_id, entry_key,
                                     section_id_key):
    prediction_key_query = {"metadata.key": entry_key}
    inference_query = {"user_id": user_id, section_id_key: section_id}
    combo_query = copy.copy(prediction_key_query)
    combo_query.update(inference_query)
    logging.debug("About to query %s" % combo_query)
    ret_list = list(edb.get_analysis_timeseries_db().find(combo_query))
    # We currently have only one algorithm
    assert len(ret_list
               ) <= 1, "Found len(ret_list) = %d, expected <=1" % len(ret_list)
    if len(ret_list) == 0:
        logging.debug("Found no inferred prediction, returning None")
        return None

    assert len(ret_list) == 1, "Found ret_list of length %d, expected 1" % len(
        ret_list)
    curr_prediction = ecwe.Entry(ret_list[0])
    return curr_prediction
def post_check(unique_user_list, all_rerun_list):
    import emission.core.get_database as edb
    import numpy as np

    logging.info("For %s users, loaded %s raw entries and %s processed entries" %
        (len(unique_user_list),
         edb.get_timeseries_db().find({"user_id": {"$in": list(unique_user_list)}}).count(),
         edb.get_analysis_timeseries_db().find({"user_id": {"$in": list(unique_user_list)}}).count()))

    all_rerun_arr = np.array(all_rerun_list)
   
    # want to check if no entry needs a rerun? In this case we are done
    # no entry needs a rerun = all entries are false, not(all entries) are true
    if np.all(np.logical_not(all_rerun_list)):
        logging.info("all entries in the timeline contain analysis results, no need to run the intake pipeline")
    # if all entries need to be re-run, we must have had raw data throughout
    elif np.all(all_rerun_list):
        logging.info("all entries in the timeline contain only raw data, need to run the intake pipeline")
    else:
        logging.info("timeline contains a mixture of analysis results and raw data - complain to shankari!")
예제 #47
0
def del_objects(args):
    del_query = {}
    if args.user_id != "all":
        del_query['user_id'] = uuid.UUID(args.user_id)

    trip_query = copy.copy(del_query)
    trip_query.update({"metadata.key": {
        "$in": ["segmentation/raw_trip", "analysis/cleaned_trip",
                "segmentation/raw_section", "analysis/cleaned_section"]}})

    place_query = copy.copy(del_query)
    place_query.update({"metadata.key": {
        "$in": ["segmentation/raw_place", "analysis/cleaned_place",
                "segmentation/raw_stop", "analysis/cleaned_stop"]}})

    point_query = copy.copy(del_query)
    point_query.update({"metadata.key": {
        "$in": ["analysis/recreated_location"]}})

    if args.date is None:
        logging.debug("no date specified, deleting everything")
    else:
        day_dt = pydt.datetime.strptime(args.date, "%Y-%m-%d")
        logging.debug("day_dt is %s" % day_dt)
        day_ts = time.mktime(day_dt.timetuple())
        logging.debug("day_ts is %s" % day_ts)
        trip_query.update({"data.start_ts": {"$gt": day_ts}})
        place_query.update({"data.exit_ts": {"$gt": day_ts}})
        point_query.update({"data.ts": {"$gt": day_ts}})

    print "trip_query = %s" % trip_query
    print "place_query = %s" % place_query
    print "point_query = %s" % point_query

    # Since sections have the same basic structure as trips and stops have the
    # same basic structure as places, we can reuse the queries
    print "Deleting trips/sections for %s after %s" % (args.user_id, args.date)
    print edb.get_analysis_timeseries_db().remove(trip_query)
    print "Deleting places/stops for %s after %s" % (args.user_id, args.date)
    print edb.get_analysis_timeseries_db().remove(place_query)
    print "Deleting points for %s after %s" % (args.user_id, args.date)
    print edb.get_analysis_timeseries_db().remove(point_query)
    def clearRelevantSections(self):
	edb.get_analysis_timeseries_db().drop()
from future import standard_library
standard_library.install_aliases()
from builtins import *
import logging
import pandas as pd
import pymongo
import itertools

import emission.core.get_database as edb
import emission.storage.timeseries.abstract_timeseries as esta

import emission.core.wrapper.entry as ecwe

ts_enum_map = {
    esta.EntryType.DATA_TYPE: edb.get_timeseries_db(),
    esta.EntryType.ANALYSIS_TYPE: edb.get_analysis_timeseries_db()
}

class BuiltinTimeSeries(esta.TimeSeries):
    def __init__(self, user_id):
        super(BuiltinTimeSeries, self).__init__(user_id)
        self.key_query = lambda key: {"metadata.key": key}
        self.type_query = lambda entry_type: {"metadata.type": entry_type}
        self.user_query = {"user_id": self.user_id} # UUID is mandatory for this version
        self.timeseries_db = ts_enum_map[esta.EntryType.DATA_TYPE]
        self.analysis_timeseries_db = ts_enum_map[esta.EntryType.ANALYSIS_TYPE]
        # Design question: Should the stats be a separate database, or should it be part
        # of the timeseries database? Technically, it should be part of the timeseries
        # database. However, I am concerned about the performance of the database
        # with even more entries - it already takes 10 seconds to query for a document
        # and I am not sure that adding a ton more data is going to make that better
예제 #50
0
 def tearDown(self):
     edb.get_analysis_timeseries_db().remove({'user_id': self.testUUID})
 def clearRelatedDb(self):
     edb.get_timeseries_db().remove({"user_id": {"$in": self.testUUIDList}})
     edb.get_analysis_timeseries_db().remove({"user_id": {"$in": self.testUUIDList}})
     edb.get_usercache_db().remove({"user_id": {"$in": self.testUUIDList}})
 def setUp(self):
     self.testUserId = uuid.uuid3(uuid.NAMESPACE_URL, "mailto:[email protected]")
     edb.get_analysis_timeseries_db().remove({'user_id': self.testUserId})
예제 #53
0
 def clearRelatedDb(self):
       edb.get_timeseries_db().delete_many({"user_id": self.testUUID})
       edb.get_analysis_timeseries_db().delete_many({"user_id": self.testUUID})
       edb.get_usercache_db().delete_many({"user_id": self.testUUID})
예제 #54
0
 def clearRelatedDb(self):
     edb.get_timeseries_db().remove({"user_id": self.testUUID})
     edb.get_analysis_timeseries_db().remove({"user_id": self.testUUID})
예제 #55
0
 def clearRelatedDb(self):
     edb.get_timeseries_db().remove({"user_id": self.testUUID})
     edb.get_analysis_timeseries_db().remove({"user_id": self.testUUID})