def tearDown(self): edb.get_timeseries_db().remove({"user_id": self.androidUUID}) edb.get_timeseries_db().remove({"user_id": self.iosUUID}) edb.get_usercache_db().remove({"user_id": self.androidUUID}) edb.get_usercache_db().remove({"user_id": self.iosUUID}) edb.get_place_db().remove() edb.get_trip_new_db().remove()
def tearDown(self): edb.get_timeseries_db().remove({"user_id": self.androidUUID}) edb.get_timeseries_db().remove({"user_id": self.iosUUID}) edb.get_usercache_db().remove({"user_id": self.androidUUID}) edb.get_usercache_db().remove({"user_id": self.iosUUID}) edb.get_analysis_timeseries_db().remove({"user_id": self.androidUUID}) edb.get_analysis_timeseries_db().remove({"user_id": self.iosUUID})
def clearRelatedDb(self): edb.get_timeseries_db().remove({"user_id": {"$in": self.testUUIDList}}) edb.get_analysis_timeseries_db().remove( {"user_id": { "$in": self.testUUIDList }}) edb.get_usercache_db().remove({"user_id": {"$in": self.testUUIDList}})
def clearRelatedDb(self): edb.get_timeseries_db().delete_many( {"user_id": { "$in": self.testUUIDList }}) edb.get_analysis_timeseries_db().delete_many( {"user_id": { "$in": self.testUUIDList }}) edb.get_usercache_db().delete_many( {"user_id": { "$in": self.testUUIDList }}) edb.get_uuid_db().delete_many({"user_id": {"$in": self.testUUIDList}})
def testMoveWhenEmpty(self): # 5 mins of data, every 30 secs = 10 entries per entry type. There are # 3 entry types, so 30 entries # First all the entries are in the usercache self.assertEqual(len(self.uc1.getMessage()), 30) self.assertEqual(len(list(self.ts1.find_entries())), 0) # Then we move entries for user1 into longterm enuah.UserCacheHandler.getUserCacheHandler( self.testUserUUID1).moveToLongTerm() # So we end up with all user1 entries in longterm self.assertEqual(len(self.uc1.getMessage()), 0) self.assertEqual(len(list(self.ts1.find_entries())), 30) # Add an invalid type edb.get_usercache_db().insert_one({ 'user_id': self.testUserUUID1, '_id': boi.ObjectId('572d3621d282b8f30def7e85'), 'data': { u'transition': None, 'currState': u'STATE_ONGOING_TRIP' }, 'metadata': { 'plugin': 'none', 'write_ts': self.curr_ts - 25, 'time_zone': u'America/Los_Angeles', 'platform': u'ios', 'key': u'statemachine/transition', 'read_ts': self.curr_ts - 27, 'type': u'message' } }) # Re-run long-term for the user enuah.UserCacheHandler.getUserCacheHandler( self.testUserUUID1).moveToLongTerm() # That was stored in error_db, no errors in main body self.assertEqual( edb.get_timeseries_error_db().find({ "user_id": self.testUserUUID1 }).count(), 1) self.assertEqual(len(self.uc1.getMessage()), 0) self.assertEqual(len(list(self.ts1.find_entries())), 30)
def get_activities_for_section(section): query = { "user_id": section.user_id, "metadata.filter": section.loc_filter, "metadata.key": "background/activity" } start_ts = section.start_ts end_ts = section.end_ts query.update({ '$and': [{ 'metadata.write_ts': { '$gt': start_ts } }, { 'metadata.write_ts': { '$lt': end_ts } }] }) full_entries = list(edb.get_usercache_db().find(query)) merged_entries = [ dict(list(entry["metadata"].items()) + list(entry["data"].items())) for entry in full_entries ] entries_df = pd.DataFrame(merged_entries) entries_df['formatted_time'] = entries_df.write_ts.apply(from_micros) entries_df['activity'] = entries_df.agb.apply(to_activity_enum) return entries_df
def insert_entries(uuid, entry_it): # We want to get the references to the databases upfront, because # otherwise, we will get a new connection for each reference, which # will slow things down a lot # See # https://github.com/e-mission/e-mission-server/commit/aed451bc41ee09a9ff11f350881c320557fea71b # for details # This is also the reason why we pass in an iterator of entries instead of # one entry at a time. We don't want the interface to contain references to # the databases, since they are an implementation detail, and opening a # connection to the database for every call ts = esta.TimeSeries.get_time_series(uuid) ucdb = edb.get_usercache_db() tsdb_count = 0 ucdb_count = 0 for entry in entry_it: assert entry[ "user_id"] is not None, "user_id for entry %s is None, cannot insert" % entry if "write_fmt_time" in entry["metadata"]: # write_fmt_time is filled in only during the formatting process # so if write_fmt_time exists, it must be in the timeseries already ts.insert(entry) tsdb_count = tsdb_count + 1 else: ucdb.save(entry) ucdb_count = ucdb_count + 1 return (tsdb_count, ucdb_count)
def get_points_for_section(section): query = { "user_id": section.user_id, "metadata.filter": section.loc_filter, "metadata.key": "background/location" } try: query.update({ '$and': [{ 'data.mTime': { '$gt': section.start_ts } }, { 'data.mTime': { '$lt': section.end_ts } }] }) except AttributeError: logging.debug("Start and end times not defined, no time query") print("final query = %s " % query) # full_entries = list(edb.get_usercache_db().find(query).sort("data.mTime", pymongo.ASCENDING)) full_entries = list(edb.get_usercache_db().find(query)) merged_entries = [ dict(list(entry["metadata"].items()) + list(entry["data"].items())) for entry in full_entries ] entries_df = pd.DataFrame(merged_entries) entries_df['formatted_time'] = entries_df.mTime.apply(from_micros) return entries_df
def sync_phone_to_server(uuid, data_from_phone): """ Puts the blob from the phone into the cache """ for data in data_from_phone: # logging.debug("About to insert %s into the database" % data) data.update({"user_id": uuid}) # Hack to deal with milliseconds until we have moved everything over if ecc.isMillisecs(data["metadata"]["write_ts"]): data["metadata"]["write_ts"] = float(data["metadata"]["write_ts"]) / 1000 if "ts" in data["data"] and ecc.isMillisecs(data["data"]["ts"]): data["data"]["ts"] = float(data["data"]["ts"]) / 1000 # logging.debug("After updating with UUId, we get %s" % data) document = {'$set': data} update_query = {'user_id': uuid, 'metadata.type': data["metadata"]["type"], 'metadata.write_ts': data["metadata"]["write_ts"], 'metadata.key': data["metadata"]["key"]} result = get_usercache_db().update(update_query, document, upsert=True) logging.debug("Updated result for user = %s, key = %s, write_ts = %s = %s" % (uuid, data["metadata"]["key"], data["metadata"]["write_ts"], result)) if 'err' in result and result['err'] is not None: logging.error("In sync_phone_to_server, err = %s" % result['err']) raise Exception()
def getPublicData(): ids = request.json['phone_ids'] all_uuids = map(lambda id: UUID(id), ids) uuids = [uuid for uuid in all_uuids if uuid in estag.TEST_PHONE_IDS] from_ts = request.query.from_ts to_ts = request.query.to_ts time_range = estt.TimeQuery("metadata.write_ts", float(from_ts), float(to_ts)) time_query = time_range.get_query() user_queries = map(lambda id: {'user_id': id}, uuids) for q in user_queries: q.update(time_query) num_entries_ts = map(lambda q: edb.get_timeseries_db().find(q).count(), user_queries) num_entries_uc = map(lambda q: edb.get_usercache_db().find(q).count(), user_queries) total_entries = sum(num_entries_ts + num_entries_uc) logging.debug("Total entries requested: %d" % total_entries) threshold = 200000 if total_entries > threshold: data_list = None else: data_list = map(lambda u: esdc.find_entries(u, None, time_range), all_uuids) return {'phone_data': data_list}
def sync_phone_to_server(uuid, data_from_phone): """ Puts the blob from the phone into the cache """ usercache_db = get_usercache_db() for data in data_from_phone: # logging.debug("About to insert %s into the database" % data) data.update({"user_id": uuid}) # Hack to deal with milliseconds until we have moved everything over if ecc.isMillisecs(data["metadata"]["write_ts"]): data["metadata"]["write_ts"] = float( data["metadata"]["write_ts"]) / 1000 if "ts" in data["data"] and ecc.isMillisecs(data["data"]["ts"]): data["data"]["ts"] = float(data["data"]["ts"]) / 1000 # logging.debug("After updating with UUId, we get %s" % data) document = {'$set': data} update_query = { 'user_id': uuid, 'metadata.type': data["metadata"]["type"], 'metadata.write_ts': data["metadata"]["write_ts"], 'metadata.key': data["metadata"]["key"] } result = usercache_db.update(update_query, document, upsert=True) logging.debug( "Updated result for user = %s, key = %s, write_ts = %s = %s" % (uuid, data["metadata"]["key"], data["metadata"]["write_ts"], result)) if 'err' in result and result['err'] is not None: logging.error("In sync_phone_to_server, err = %s" % result['err']) raise Exception()
def insert_entries(uuid, entry_it): # We want to get the references to the databases upfront, because # otherwise, we will get a new connection for each reference, which # will slow things down a lot # See # https://github.com/e-mission/e-mission-server/commit/aed451bc41ee09a9ff11f350881c320557fea71b # for details # This is also the reason why we pass in an iterator of entries instead of # one entry at a time. We don't want the interface to contain references to # the databases, since they are an implementation detail, and opening a # connection to the database for every call ts = esta.TimeSeries.get_time_series(uuid) ucdb = edb.get_usercache_db() tsdb_count = 0 ucdb_count = 0 for entry in entry_it: assert entry["user_id"] is not None, "user_id for entry %s is None, cannot insert" % entry if "write_fmt_time" in entry["metadata"]: # write_fmt_time is filled in only during the formatting process # so if write_fmt_time exists, it must be in the timeseries already ts.insert(entry) tsdb_count = tsdb_count + 1 else: ucdb.save(entry) ucdb_count = ucdb_count + 1 return (tsdb_count, ucdb_count)
def get_transitions_df(user_id, loc_filter, start_dt, end_dt): query = { "user_id": user_id, "metadata.filter": loc_filter, "metadata.key": "statemachine/transition" } if (start_dt is not None and end_dt is not None): start_ts = time.mktime(start_dt.timetuple()) * 1000 end_ts = time.mktime(end_dt.timetuple()) * 1000 query.update({ '$and': [{ 'metadata.write_ts': { '$gt': start_ts } }, { 'metadata.write_ts': { '$lt': end_ts } }] }) full_entries = list(edb.get_usercache_db().find(query)) merged_entries = [ dict(list(entry["metadata"].items()) + list(entry["data"].items())) for entry in full_entries ] entries_df = pd.DataFrame(merged_entries) entries_df['formatted_time'] = entries_df.write_ts.apply(from_micros) return entries_df
def __init__(self, user_id): super(BuiltinUserCache, self).__init__(user_id) self.key_query = lambda (key): { "metadata.key": key } self.ts_query = lambda (tq): BuiltinUserCache._get_ts_query(tq) self.type_query = lambda (entry_type): {"metadata.type": entry_type} self.db = get_usercache_db()
def testMoveDuplicateKey(self): # 5 mins of data, every 30 secs = 10 entries per entry type. There are # 3 entry types, so 30 entries # First all the entries are in the usercache self.assertEqual(len(self.uc1.getMessage()), 30) self.assertEqual(len(list(self.ts1.find_entries())), 0) # Store the entries before the move so that we can duplicate them later entries_before_move = self.uc1.getMessage() # Then we move entries for user1 into longterm enuah.UserCacheHandler.getUserCacheHandler( self.testUserUUID1).moveToLongTerm() # So we end up with all user1 entries in longterm self.assertEqual(len(self.uc1.getMessage()), 0) self.assertEqual(len(list(self.ts1.find_entries())), 30) # Put the same entries (with the same object IDs into the cache again) edb.get_usercache_db().insert_many(entries_before_move) self.assertEqual(len(self.uc1.getMessage()), 30) self.assertEqual(len(self.uc2.getMessage()), 30) # Also reset the user2 cache to be user1 so that we have a fresh supply of entries update_result = edb.get_usercache_db().update_many( {"user_id": self.testUserUUID2}, {"$set": { "user_id": self.testUserUUID1 }}) logging.debug("update_result = %s" % update_result) # Now, we should have 60 entries in the usercache (30 duplicates + 30 from user2) self.assertEqual(len(self.uc1.getMessage()), 60) self.assertEqual(len(list(self.ts1.find_entries())), 30) edb.get_pipeline_state_db().delete_many( {"user_id": self.testUserUUID1}) # Then we move entries for user1 into longterm again enuah.UserCacheHandler.getUserCacheHandler( self.testUserUUID1).moveToLongTerm() # All the duplicates should have been ignored, and the new entries moved into the timeseries self.assertEqual(len(self.uc1.getMessage()), 0) self.assertEqual(len(list(self.ts1.find_entries())), 60)
def __init__(self, uuid): super(BuiltinUserCache, self).__init__(uuid) self.key_query = lambda(key): {"metadata.key": key}; self.ts_query = lambda(tq): {"$and": [{"metadata.%s" % tq.timeType: {"$gte": tq.startTs}}, {"metadata.%s" % tq.timeType: {"$lte": tq.endTs}}]} self.type_query = lambda(entry_type): {"metadata.type": entry_type} # time.time() returns seconds. Our format requires milliseconds self.get_utc_ts = lambda(_): int(time.time() * 1000) self.db = get_usercache_db()
def testNoOverrides(self): tq = enua.UserCache.TimeQuery("write_ts", 1440658800, 1440745200) eacc.save_all_configs(self.androidUUID, tq) saved_entries = list(edb.get_usercache_db().find({ 'user_id': self.androidUUID, 'metadata.key': 'config/sensor_config' })) self.assertEqual(len(saved_entries), 0)
def copy_to_usercache(): # Step 1: Copy data back to user cache error_it = edb.get_timeseries_error_db().find() logging.info("Found %d errors in this round" % error_it.count()) for error in error_it: logging.debug("Copying entry %s" % error["metadata"]) save_result = edb.get_usercache_db().save(error) remove_result = edb.get_timeseries_error_db().remove(error["_id"]) logging.debug("save_result = %s, remove_result = %s" % (save_result, remove_result)) logging.info("step copy_to_usercache DONE")
def sync_server_to_phone(uuid): """ Gets the blob to sync to send to the phone and sends it over Return None if there is no data """ retrievedData = list(get_usercache_db().find({"user_id": uuid, "metadata.type": "document"}, # query {'_id': False, 'user_id': False}).sort("metadata.write_ts", pymongo.ASCENDING)) # projection, sort # logging.debug("retrievedData = %s" % retrievedData) return retrievedData
def testOneOverride(self): cfg_1 = copy.copy(self.dummy_config) cfg_1['metadata']['write_ts'] = 1440700000 edb.get_timeseries_db().insert(cfg_1) tq = estt.TimeQuery("metadata.write_ts", 1440658800, 1440745200) eacc.save_all_configs(self.androidUUID, tq) saved_entries = list(edb.get_usercache_db().find({'user_id': self.androidUUID, 'metadata.key': 'config/sensor_config'})) self.assertEqual(len(saved_entries), 1) logging.debug(saved_entries[0]) self.assertEqual(saved_entries[0]['data']['is_duty_cycling'], cfg_1['data']['is_duty_cycling'])
def testMoveDuplicateKey(self): # 5 mins of data, every 30 secs = 10 entries per entry type. There are # 3 entry types, so 30 entries # First all the entries are in the usercache self.assertEqual(len(self.uc1.getMessage()), 30) self.assertEqual(len(list(self.ts1.find_entries())), 0) # Store the entries before the move so that we can duplicate them later entries_before_move = self.uc1.getMessage() # Then we move entries for user1 into longterm enuah.UserCacheHandler.getUserCacheHandler(self.testUserUUID1).moveToLongTerm() # So we end up with all user1 entries in longterm self.assertEqual(len(self.uc1.getMessage()), 0) self.assertEqual(len(list(self.ts1.find_entries())), 30) # Put the same entries (with the same object IDs into the cache again) edb.get_usercache_db().insert(entries_before_move) self.assertEqual(len(self.uc1.getMessage()), 30) self.assertEqual(len(self.uc2.getMessage()), 30) # Also reset the user2 cache to be user1 so that we have a fresh supply of entries update_result = edb.get_usercache_db().update({"user_id": self.testUserUUID2}, {"$set": {"user_id": self.testUserUUID1}}, multi=True) logging.debug("update_result = %s" % update_result) # Now, we should have 60 entries in the usercache (30 duplicates + 30 from user2) self.assertEqual(len(self.uc1.getMessage()), 60) self.assertEqual(len(list(self.ts1.find_entries())), 30) edb.get_pipeline_state_db().remove({"user_id": self.testUserUUID1}) # Then we move entries for user1 into longterm again enuah.UserCacheHandler.getUserCacheHandler(self.testUserUUID1).moveToLongTerm() # All the duplicates should have been ignored, and the new entries moved into the timeseries self.assertEqual(len(self.uc1.getMessage()), 0) self.assertEqual(len(list(self.ts1.find_entries())), 60)
def copy_to_usercache(): # Step 1: Copy data back to user cache error_it = edb.get_timeseries_error_db().find() uc = edb.get_usercache_db() te = edb.get_timeseries_error_db() logging.info("Found %d errors in this round" % error_it.count()) for error in error_it: logging.debug("Copying entry %s" % error["metadata"]) save_result = uc.save(error) remove_result = te.remove(error["_id"]) logging.debug("save_result = %s, remove_result = %s" % (save_result, remove_result)) logging.info("step copy_to_usercache DONE")
def testOldOverride(self): cfg_1 = copy.copy(self.dummy_config) cfg_1['metadata']['write_ts'] = 1440500000 edb.get_timeseries_db().insert(cfg_1) cfg_2 = copy.copy(self.dummy_config) cfg_2['metadata']['write_ts'] = 1440610000 edb.get_timeseries_db().insert(cfg_2) tq = estt.TimeQuery("metadata.write_ts", 1440658800, 1440745200) eacc.save_all_configs(self.androidUUID, tq) saved_entries = list(edb.get_usercache_db().find({'user_id': self.androidUUID, 'metadata.key': 'config/sensor_config'})) self.assertEqual(len(saved_entries), 0)
def testMoveWhenEmpty(self): # 5 mins of data, every 30 secs = 10 entries per entry type. There are # 3 entry types, so 30 entries # First all the entries are in the usercache self.assertEqual(len(self.uc1.getMessage()), 30) self.assertEqual(len(list(self.ts1.find_entries())), 0) # Then we move entries for user1 into longterm enuah.UserCacheHandler.getUserCacheHandler(self.testUserUUID1).moveToLongTerm() # So we end up with all user1 entries in longterm self.assertEqual(len(self.uc1.getMessage()), 0) self.assertEqual(len(list(self.ts1.find_entries())), 30) # Add an invalid type edb.get_usercache_db().insert({ 'user_id': self.testUserUUID1, '_id': boi.ObjectId('572d3621d282b8f30def7e85'), 'data': {u'transition': None, 'currState': u'STATE_ONGOING_TRIP'}, 'metadata': {'plugin': 'none', 'write_ts': self.curr_ts - 25, 'time_zone': u'America/Los_Angeles', 'platform': u'ios', 'key': u'statemachine/transition', 'read_ts': self.curr_ts - 27, 'type': u'message'}}) # Re-run long-term for the user enuah.UserCacheHandler.getUserCacheHandler(self.testUserUUID1).moveToLongTerm() # That was stored in error_db, no errors in main body self.assertEqual(edb.get_timeseries_error_db().find({"user_id": self.testUserUUID1}).count(), 1) self.assertEqual(len(self.uc1.getMessage()), 0) self.assertEqual(len(list(self.ts1.find_entries())), 30)
def get_activities_for_section(section): query = {"user_id": section.user_id, "metadata.filter": section.loc_filter, "metadata.key": "background/activity"} start_ts = section.start_ts end_ts = section.end_ts query.update({'$and': [{'metadata.write_ts': {'$gt': start_ts}}, {'metadata.write_ts': {'$lt': end_ts}}]}) full_entries = list(edb.get_usercache_db().find(query)) merged_entries = [dict(entry["metadata"].items() + entry["data"].items()) for entry in full_entries] entries_df = pd.DataFrame(merged_entries) entries_df['formatted_time'] = entries_df.write_ts.apply(from_micros) entries_df['activity'] = entries_df.agb.apply(to_activity_enum) return entries_df
def clearRelatedDb(self): logging.info( "Timeseries delete result %s" % edb.get_timeseries_db().delete_many({ "user_id": self.testUUID }).raw_result) logging.info("Analysis delete result %s" % edb.get_analysis_timeseries_db().delete_many({ "user_id": self.testUUID }).raw_result) logging.info( "Usercache delete result %s" % edb.get_usercache_db().delete_many({ "user_id": self.testUUID }).raw_result)
def get_transitions_df(user_id, loc_filter, start_dt, end_dt): query = {"user_id": user_id, "metadata.filter": loc_filter, "metadata.key": "statemachine/transition"} if (start_dt is not None and end_dt is not None): start_ts = time.mktime(start_dt.timetuple()) * 1000 end_ts = time.mktime(end_dt.timetuple()) * 1000 query.update({'$and': [{'metadata.write_ts': {'$gt': start_ts}}, {'metadata.write_ts': {'$lt': end_ts}}]}) full_entries = list(edb.get_usercache_db().find(query)) merged_entries = [dict(entry["metadata"].items() + entry["data"].items()) for entry in full_entries] entries_df = pd.DataFrame(merged_entries) entries_df['formatted_time'] = entries_df.write_ts.apply(from_micros) return entries_df
def fix_key(check_field, new_key): print("First entry for "+new_key+" is %s" % list(edb.get_timeseries_db().find( {"metadata.key": "config/sensor_config", check_field: {"$exists": True}}).sort( "metadata/write_ts").limit(1))) udb = edb.get_usercache_db() tdb = edb.get_timeseries_db() for i, entry in enumerate(edb.get_timeseries_db().find( {"metadata.key": "config/sensor_config", check_field: {"$exists": True}})): entry["metadata"]["key"] = new_key if i % 10000 == 0: print(udb.insert(entry)) print(tdb.remove(entry["_id"])) else: udb.insert(entry) tdb.remove(entry["_id"])
def get_points_for_section(section): query = {"user_id": section.user_id, "metadata.filter": section.loc_filter, "metadata.key": "background/location"} try: query.update({'$and': [{'data.mTime': {'$gt': section.start_ts}}, {'data.mTime': {'$lt': section.end_ts}}]}) except AttributeError: logging.debug("Start and end times not defined, no time query") print "final query = %s " % query # full_entries = list(edb.get_usercache_db().find(query).sort("data.mTime", pymongo.ASCENDING)) full_entries = list(edb.get_usercache_db().find(query)) merged_entries = [dict(entry["metadata"].items() + entry["data"].items()) for entry in full_entries] entries_df = pd.DataFrame(merged_entries) entries_df['formatted_time'] = entries_df.mTime.apply(from_micros) return entries_df
def get_plottable_df(user_id, loc_filter, start_dt, end_dt): query = {"user_id": user_id, "metadata.filter": loc_filter, "metadata.key": "background/location"} if (start_dt is not None and end_dt is not None): start_ts = time.mktime(start_dt.timetuple()) end_ts = time.mktime(end_dt.timetuple()) query.update({'$and': [{'data.mTime': {'$gt': start_ts * 1000}}, {'data.mTime': {'$lt': end_ts * 1000}}]}) print "final query = %s " % query full_entries = list(edb.get_usercache_db().find(query).sort("data.mTime", pymongo.ASCENDING)) merged_entries = [dict(entry["metadata"].items() + entry["data"].items()) for entry in full_entries] entries_df = pd.DataFrame(merged_entries) from_micros = lambda x: pydt.datetime.fromtimestamp(x/1000) entries_df['formatted_time'] = entries_df.mTime.apply(from_micros) return entries_df
def sync_phone_to_server(uuid, data_from_phone): """ Puts the blob from the phone into the cache """ for data in data_from_phone: logging.debug("About to insert %s into the database" % data) data.update({"user_id": uuid}) logging.debug("After updating with UUId, we get %s" % data) document = {'$set': data} update_query = {'user_id': uuid, 'metadata.type': data["metadata"]["type"], 'metadata.write_ts': data["metadata"]["write_ts"], 'metadata.key': data["metadata"]["key"]} result = get_usercache_db().update(update_query, document, upsert=True) logging.debug("Updated result for key = %s, write_ts = %s = %s" % (data["metadata"]["key"], data["metadata"]["write_ts"], result)) if 'err' in result and result['err'] is not None: logging.error("In sync_phone_to_server, err = %s" % result['err']) raise Exception()
def fix_key(check_field, new_key): print("First entry for " + new_key + " is %s" % list(edb.get_timeseries_db().find({ "metadata.key": "config/sensor_config", check_field: { "$exists": True } }).sort("metadata/write_ts").limit(1))) udb = edb.get_usercache_db() tdb = edb.get_timeseries_db() for i, entry in enumerate(edb.get_timeseries_db().find({ "metadata.key": "config/sensor_config", check_field: { "$exists": True } })): entry["metadata"]["key"] = new_key if i % 10000 == 0: print(udb.insert(entry)) print(tdb.remove(entry["_id"])) else: udb.insert(entry) tdb.remove(entry["_id"])
def sync_phone_to_server(uuid, data_from_phone): """ Puts the blob from the phone into the cache """ usercache_db = get_usercache_db() for data in data_from_phone: # logging.debug("About to insert %s into the database" % data) data.update({"user_id": uuid}) # Hack to deal with milliseconds until we have moved everything over if ecc.isMillisecs(data["metadata"]["write_ts"]): data["metadata"]["write_ts"] = old_div( float(data["metadata"]["write_ts"]), 1000) if "ts" in data["data"] and ecc.isMillisecs(data["data"]["ts"]): data["data"]["ts"] = old_div(float(data["data"]["ts"]), 1000) # logging.debug("After updating with UUId, we get %s" % data) document = {'$set': data} update_query = { 'user_id': uuid, 'metadata.type': data["metadata"]["type"], 'metadata.write_ts': data["metadata"]["write_ts"], 'metadata.key': data["metadata"]["key"] } result = usercache_db.update_one(update_query, document, upsert=True) logging.debug( "Updated result for user = %s, key = %s, write_ts = %s = %s" % (uuid, data["metadata"]["key"], data["metadata"]["write_ts"], result.raw_result)) # I am not sure how to trigger a writer error to test this # and whether this is the format expected from the server in the rawResult if 'ok' in result.raw_result and result.raw_result['ok'] != 1.0: logging.error("In sync_phone_to_server, err = %s" % result.raw_result['writeError']) raise Exception()
def get_uuid_list(): return get_usercache_db().distinct("user_id")
def tearDown(self): edb.get_analysis_timeseries_db().delete_many( {'user_id': self.testUserId}) edb.get_usercache_db().delete_many({'user_id': self.testUserId})
def clearRelatedDb(self): edb.get_timeseries_db().delete_many({"user_id": self.testUUID}) edb.get_analysis_timeseries_db().delete_many({"user_id": self.testUUID}) edb.get_usercache_db().delete_many({"user_id": self.testUUID})
def __init__(self, user_id): super(BuiltinUserCache, self).__init__(user_id) self.key_query = lambda(key): {"metadata.key": key}; self.ts_query = lambda(tq): BuiltinUserCache._get_ts_query(tq) self.type_query = lambda(entry_type): {"metadata.type": entry_type} self.db = get_usercache_db()
def get_uuid_list(): return edb.get_usercache_db().distinct('user_id')
reset_collection(edb.get_analysis_timeseries_db(), user.uuid, new_uuid) logging.debug("Resetting client...") reset_collection(edb.get_client_db(), user.uuid, new_uuid) logging.debug("Resetting client_stats_backup...") reset_collection(edb.get_client_stats_db_backup(), user.uuid, new_uuid) logging.debug("Resetting server_stats_backup...") reset_collection(edb.get_server_stats_db_backup(), user.uuid, new_uuid) logging.debug("Resetting result_stats_backup...") reset_collection(edb.get_result_stats_db_backup(), user.uuid, new_uuid) logging.debug("Resetting edb.get_common_place_db...") reset_collection(edb.get_common_place_db(), user.uuid, new_uuid) logging.debug("Resetting edb.get_common_trip_db...") reset_collection(edb.get_common_trip_db(), user.uuid, new_uuid) logging.debug("Resetting edb.get_habitica_db...") reset_collection(edb.get_habitica_db(), user.uuid, new_uuid) logging.debug("Resetting edb.get_pipeline_state_db...") reset_collection(edb.get_pipeline_state_db(), user.uuid, new_uuid) logging.debug("Resetting edb.get_profile_db...") reset_collection(edb.get_profile_db(), user.uuid, new_uuid) logging.debug("Resetting edb.get_timeseries_db...") reset_collection(edb.get_timeseries_db(), user.uuid, new_uuid) logging.debug("Resetting edb.get_timeseries_error_db...") reset_collection(edb.get_timeseries_error_db(), user.uuid, new_uuid) logging.debug("Resetting edb.get_usercache_db...") reset_collection(edb.get_usercache_db(), user.uuid, new_uuid)
import emission.core.wrapper.user as ecwu if __name__ == '__main__': parser = argparse.ArgumentParser(prog="save_ground_truth") group = parser.add_mutually_exclusive_group(required=True) group.add_argument("-e", "--user_email") group.add_argument("-u", "--user_uuid") parser.add_argument("date", help="date to retrieve ground truth (YYYY-MM-DD)") parser.add_argument("file_name", help="file_name to store the result to") args = parser.parse_args() if args.user_uuid: sel_uuid = uuid.UUID(args.user_uuid) else: sel_uuid = ecwu.User.fromEmail(args.user_email).uuid print("Saving data for %s, %s to file %s" % (sel_uuid, args.date, args.file_name)) tj = edb.get_usercache_db().find_one({ 'metadata.key': "diary/trips-%s" % args.date, "user_id": sel_uuid }) print("Retrieved object is of length %s" % len(tj)) json.dump(tj, open(args.file_name, "w"), indent=4, default=bju.default)
# The temporary dump files are now used to evaluate the efficiency of various # smoothing algorithms. This script is not useful otherwise. # Standard imports import json from attrdict import AttrDict from pymongo import MongoClient import logging from dateutil import parser import time import emission.core.get_database as edb to_ts = lambda(dt): time.mktime(dt.timetuple()) * 1000 logging.basicConfig(level=logging.DEBUG) reconstructedTimeSeriesDb = edb.get_usercache_db() reconstructedTripsDb = edb.get_section_db() def load_file(curr_list): prevSection = None for entryJSON in curr_list: entryDict = AttrDict(entryJSON) if entryDict.type == "move": trip_id = entryDict.startTime for i, activity in enumerate(entryDict.activities): print("For trip id = %s, activity %s starts at %s" % (trip_id, i, activity.startTime)) section = AttrDict() section.id = trip_id + "_"+ str(i) section.filter = "time" section.source = "raw_auto" section.start_time = activity.startTime
# The temporary dump files are now used to evaluate the efficiency of various # smoothing algorithms. This script is not useful otherwise. # Standard imports import json from attrdict import AttrDict from pymongo import MongoClient import logging from dateutil import parser import time import emission.core.get_database as edb to_ts = lambda (dt): time.mktime(dt.timetuple()) * 1000 logging.basicConfig(level=logging.DEBUG) reconstructedTimeSeriesDb = edb.get_usercache_db() reconstructedTripsDb = edb.get_section_db() def load_file(curr_list): prevSection = None for entryJSON in curr_list: entryDict = AttrDict(entryJSON) if entryDict.type == "move": trip_id = entryDict.startTime for i, activity in enumerate(entryDict.activities): print("For trip id = %s, activity %s starts at %s" % (trip_id, i, activity.startTime)) section = AttrDict() section.id = trip_id + "_" + str(i) section.filter = "time"
import attrdict as ad import json import bson.json_util as bju import sys from uuid import UUID import argparse import emission.core.wrapper.user as ecwu if __name__ == '__main__': parser = argparse.ArgumentParser(prog="save_ground_truth") group = parser.add_mutually_exclusive_group(required=True) group.add_argument("-e", "--user_email") group.add_argument("-u", "--user_uuid") parser.add_argument("date", help="date to retrieve ground truth (YYYY-MM-DD)") parser.add_argument("file_name", help="file_name to store the result to") args = parser.parse_args() if args.user_uuid: sel_uuid = uuid.UUID(args.user_uuid) else: sel_uuid = ecwu.User.fromEmail(args.user_email).uuid print("Saving data for %s, %s to file %s" % (sel_uuid, args.date, args.file_name)) tj = edb.get_usercache_db().find_one({'metadata.key': "diary/trips-%s" % args.date, "user_id": sel_uuid}) print("Retrieved object is of length %s" % len(tj)) json.dump(tj, open(args.file_name, "w"), indent=4, default=bju.default)
def tearDown(self): edb.get_usercache_db().delete_many({"user_id": self.testUserUUID1}) edb.get_usercache_db().delete_many({"user_id": self.testUserUUID2}) edb.get_usercache_db().delete_many({"user_id": self.testUserUUIDios})
def tearDown(self): edb.get_usercache_db().remove({"user_id": self.testUserUUID1}) edb.get_usercache_db().remove({"user_id": self.testUserUUID2}) edb.get_usercache_db().remove({"user_id": self.testUserUUIDios})
def testNoOverrides(self): tq = estt.TimeQuery("metadata.write_ts", 1440658800, 1440745200) eacc.save_all_configs(self.androidUUID, tq) saved_entries = list(edb.get_usercache_db().find({'user_id': self.androidUUID, 'metadata.key': 'config/sensor_config'})) self.assertEqual(len(saved_entries), 0)
edb.get_uuid_db().update({"uuid" : user.uuid}, {"$set": {"uuid" : new_uuid}}) logging.debug("Resetting alternatives...") reset_collection(edb.get_alternatives_db(), user.uuid, new_uuid) logging.debug("Resetting analysis...") reset_collection(edb.get_analysis_timeseries_db(), user.uuid, new_uuid) logging.debug("Resetting client...") reset_collection(edb.get_client_db(), user.uuid, new_uuid) logging.debug("Resetting client_stats_backup...") reset_collection(edb.get_client_stats_db_backup(), user.uuid, new_uuid) logging.debug("Resetting server_stats_backup...") reset_collection(edb.get_server_stats_db_backup(), user.uuid, new_uuid) logging.debug("Resetting result_stats_backup...") reset_collection(edb.get_result_stats_db_backup(), user.uuid, new_uuid) logging.debug("Resetting edb.get_common_place_db...") reset_collection(edb.get_common_place_db(), user.uuid, new_uuid) logging.debug("Resetting edb.get_common_trip_db...") reset_collection(edb.get_common_trip_db(), user.uuid, new_uuid) logging.debug("Resetting edb.get_habitica_db...") reset_collection(edb.get_habitica_db(), user.uuid, new_uuid) logging.debug("Resetting edb.get_pipeline_state_db...") reset_collection(edb.get_pipeline_state_db(), user.uuid, new_uuid) logging.debug("Resetting edb.get_profile_db...") reset_collection(edb.get_profile_db(), user.uuid, new_uuid) logging.debug("Resetting edb.get_timeseries_db...") reset_collection(edb.get_timeseries_db(), user.uuid, new_uuid) logging.debug("Resetting edb.get_timeseries_error_db...") reset_collection(edb.get_timeseries_error_db(), user.uuid, new_uuid) logging.debug("Resetting edb.get_usercache_db...") reset_collection(edb.get_usercache_db(), user.uuid, new_uuid)
def clearRelatedDb(self): edb.get_timeseries_db().remove({"user_id": {"$in": self.testUUIDList}}) edb.get_analysis_timeseries_db().remove({"user_id": {"$in": self.testUUIDList}}) edb.get_usercache_db().remove({"user_id": {"$in": self.testUUIDList}})
def clearRelatedDb(self): edb.get_timeseries_db().delete_many({"user_id": self.testUUID}) edb.get_analysis_timeseries_db().delete_many( {"user_id": self.testUUID}) edb.get_usercache_db().delete_one({"user_id": self.testUUID})