def clearRelatedDb(self): edb.get_timeseries_db().remove({"user_id": {"$in": self.testUUIDList}}) edb.get_analysis_timeseries_db().remove( {"user_id": { "$in": self.testUUIDList }}) edb.get_usercache_db().remove({"user_id": {"$in": self.testUUIDList}}) edb.get_uuid_db().remove({"user_id": {"$in": self.testUUIDList}})
def fromEmail(userEmail): email2UUID = get_uuid_db().find_one({'user_email': userEmail}) if email2UUID is None: return None user = User(email2UUID['uuid']) user.__email = userEmail return user
def purge_entries_for_user(curr_uuid, is_purge_state, db_array=None): logging.info("For uuid = %s, deleting entries from the timeseries" % curr_uuid) if db_array is not None: [ts_db, ats_db, udb, psdb] = db_array logging.debug("db_array passed in with databases %s" % db_array) else: import emission.core.get_database as edb ts_db = edb.get_timeseries_db() ats_db = edb.get_analysis_timeseries_db() udb = edb.get_uuid_db() psdb = edb.get_pipeline_state_db() logging.debug("db_array not passed in, looking up databases") timeseries_del_result = ts_db.remove({"user_id": curr_uuid}) logging.info("result = %s" % timeseries_del_result) logging.info("For uuid = %s, deleting entries from the analysis_timeseries" % curr_uuid) analysis_timeseries_del_result = ats_db.remove({"user_id": curr_uuid}) logging.info("result = %s" % analysis_timeseries_del_result) logging.info("For uuid %s, deleting entries from the user_db" % curr_uuid) user_db_del_result = udb.remove({"uuid": curr_uuid}) logging.info("result = %s" % user_db_del_result) if is_purge_state: logging.info("For uuid %s, deleting entries from the pipeline_state_db" % curr_uuid) psdb_del_result = psdb.remove({"user_id": curr_uuid}) logging.info("result = %s" % psdb_del_result)
def purge_entries_for_user(curr_uuid, is_purge_state, db_array=None): logging.info("For uuid = %s, deleting entries from the timeseries" % curr_uuid) if db_array is not None: [ts_db, ats_db, udb, psdb] = db_array logging.debug("db_array passed in with databases %s" % db_array) else: import emission.core.get_database as edb ts_db = edb.get_timeseries_db() ats_db = edb.get_analysis_timeseries_db() udb = edb.get_uuid_db() psdb = edb.get_pipeline_state_db() logging.debug("db_array not passed in, looking up databases") timeseries_del_result = ts_db.remove({"user_id": curr_uuid}) logging.info("result = %s" % timeseries_del_result) logging.info( "For uuid = %s, deleting entries from the analysis_timeseries" % curr_uuid) analysis_timeseries_del_result = ats_db.remove({"user_id": curr_uuid}) logging.info("result = %s" % analysis_timeseries_del_result) logging.info("For uuid %s, deleting entries from the user_db" % curr_uuid) user_db_del_result = udb.remove({"uuid": curr_uuid}) logging.info("result = %s" % user_db_del_result) if is_purge_state: logging.info( "For uuid %s, deleting entries from the pipeline_state_db" % curr_uuid) psdb_del_result = psdb.remove({"user_id": curr_uuid}) logging.info("result = %s" % psdb_del_result)
def getUUID(request, inHeader=False): retUUID = None if skipAuth: if 'User' in request.headers or 'user' in request.json: # skipAuth = true, so the email will be sent in plaintext userEmail = __getToken__(request, inHeader) retUUID = __getUUIDFromEmail__(userEmail) logging.debug( "skipAuth = %s, returning UUID directly from email %s" % (skipAuth, retUUID)) else: # Return a random user to make it easy to experiment without having to specify a user # TODO: Remove this if it is not actually used from get_database import get_uuid_db user_uuid = get_uuid_db().find_one()['uuid'] retUUID = user_uuid logging.debug("skipAuth = %s, returning arbitrary UUID %s" % (skipAuth, retUUID)) if Client("choice").getClientKey() is None: Client("choice").update(createKey=True) else: userToken = __getToken__(request, inHeader) retUUID = getUUIDFromToken(userToken) if retUUID is None: raise HTTPError(403, "token is valid, but no account found for user") request.params.user_uuid = retUUID return retUUID
def setUp(self): # Make sure we start with a clean slate every time self.serverName = 'localhost' common.dropAllCollections(get_db()) logging.info("After setup, client count = %d, profile count = %d, uuid count = %d" % (get_client_db().find().count(), get_profile_db().count(), get_uuid_db().count())) common.loadTable(self.serverName, "Stage_Modes", "emission/tests/data/modes.json")
def find_inactive_users(): inactive_users = [] inactive_users_new_consent = "" inactive_users_old_consent = "" inactive_users_before_september = "" inactive_users_after_september = "" one_week_ago_ts = arrow.utcnow().replace(weeks=-1).timestamp september_first = arrow.get('2016-09-01').timestamp for user in edb.get_uuid_db().find(): db = esta.TimeSeries.get_time_series(user['uuid']).get_data_df("stats/server_api_time", time_query=None) new_consent = esta.TimeSeries.get_time_series(user['uuid']).get_data_df("config/consent", time_query=None) signup_date = arrow.get(user['update_ts']) if db.empty: inactive_users.append((user['user_email'], signup_date.date(), ())) if new_consent.empty: inactive_users_new_consent+=str(user['user_email'])+', ' else: inactive_users_old_consent+=str(user['user_email'])+', ' if signup_date.timestamp < september_first: inactive_users_before_september+=str(user['user_email'])+', ' else: inactive_users_after_september+=str(user['user_email'])+', ' else: #check last usercache call: #the user is inactive if there are no calls or if the last one was before one_week_ago_ts last_usercache_call = db[db['name'].str.contains('usercache', case=False)].tail(1) if last_usercache_call.empty: inactive_users.append((user['user_email'], signup_date.date(), ())) if new_consent.empty: inactive_users_new_consent+=str(user['user_email'])+', ' else: inactive_users_old_consent+=str(user['user_email'])+', ' if signup_date.timestamp < september_first: inactive_users_before_september+=str(user['user_email'])+', ' else: inactive_users_after_september+=str(user['user_email'])+', ' else: if last_usercache_call.iloc[0]['ts'] < one_week_ago_ts: inactive_users.append((user['user_email'], signup_date.date(), arrow.get(last_usercache_call.iloc[0]['ts']).date())) if new_consent.empty: inactive_users_new_consent+=str(user['user_email'])+', ' else: inactive_users_old_consent+=str(user['user_email'])+', ' if signup_date.timestamp < september_first: inactive_users_before_september+=str(user['user_email'])+', ' else: inactive_users_after_september+=str(user['user_email'])+', ' inactive_users_table = pd.DataFrame(inactive_users, columns=['Email', 'Last Sign Up Date', 'Last Usercache Call']) print "\nList of inactive users emails and date they signed up:" print inactive_users_table print "\nEmails of inactive users who consented to the new IRB protocol:" print inactive_users_new_consent[:-2] print "\nEmails of inactive users who did not consent to the new IRB protocol:" print inactive_users_old_consent[:-2] print "\nEmails of inactive users who signed up before September 1st:" print inactive_users_before_september[:-2] print "\nEmails of inactive users who signed up after September 1st:" print inactive_users_after_september[:-2] return
def map_uuid_to_label(uuid, label): ur = edb.get_uuid_db().update_one({"uuid": uuid}, {"$set": { "user_email": label }}) print(ur.raw_result) assert (ur.matched_count == 1) assert (ur.modified_count == 1)
def fromUUID(user_uuid): user = User(user_uuid) uuid2Email = get_uuid_db().find_one({'uuid': user_uuid}) # Remove once we remove obsolete code/tests that doesn't create an email -> # uuid mapping if uuid2Email is not None and 'user_email' in uuid2Email: user.__email = uuid2Email['user_email'] return user
def registerWithUUID(userEmail, anonUUID): from datetime import datetime from emission.core.wrapper.client import Client # We are accessing three databases here: # - The list of pending registrations (people who have filled out demographic # information but not installed the app) # - The mapping from the userEmail to the user UUID # - The mapping from the UUID to other profile information about the user # The first two are indexed by the user email. We will use the same field # name in both to indicate that it is a shared key. This also allows us to # have a simple query that we can reuse. userEmailQuery = {'user_email': userEmail} # First, we construct the email -> uuid mapping and store it in the appropriate database. # At this point, we don't know or care whether the user is part of a study # We also store a create timestamp just because that's always a good idea # What happens if the user calls register() again? Do we want to generate a new UUID? # Do we want to update the create timestamp? # For now, let's assume that the answer to both of those questions is yes, # because that allows us to use upsert :) # A bonus fix is that if something is messed up in the DB, calling create again will fix it. emailUUIDObject = {'user_email': userEmail, 'uuid': anonUUID, 'update_ts': datetime.now()} writeResultMap = get_uuid_db().replace_one(userEmailQuery, emailUUIDObject, upsert=True) # Note, if we did want the create_ts to not be overwritten, we can use the # writeResult to decide how to deal with the values # Now, we look to see if the user is part of a study. We can either store # this information in the profile database, or the mapping, or both. For now, # let us store this in the profile database since it is sufficient for it to # be associated with the UUID, we anticipate using it for customization, and # we assume that other customization stuff will be stored in the profile. # We could also assume that we will create the profile if we created the map # and update if we updated. But that has some reliability issues. For # example, what if creating the map succeeded but creating the profile # failed? Subsequently calling the method again to try and fix the profile # will continue to fail because we will be trying to update. # Much better to deal with it separately by doing a separate upsert # Second decision: what do we do if the user is not part of a study? Create a # profile anyway with an empty list, or defer the creation of the profile? # # Decision: create profile with empty list for two reasons: # a) for most of the functions, we want to use the profile data. We should # only use the email -> uuid map in the API layer to get the UUID, and use # the UUID elsewhere. So we need to have profiles for non-study participants # as well. # b) it will also make the scripts to update the profile in the background # easier to write. They won't have to query the email -> UUID database and # create the profile if it doesn't exist - they can just work off the profile # database. # TODO: Write a script that periodically goes through and identifies maps # that don't have an associated profile and fix them writeResultProfile = User.createProfile(anonUUID, datetime.now()) return User.fromUUID(anonUUID)
def create_party_leaders(): ju_email = "*****@*****.**" ecwu.User.register(ju_email) ju_uuid = edb.get_uuid_db().find_one({'user_email': ju_email})['uuid'] logging.debug("Found Juliana's uuid %s" % ju_uuid) proxy.habiticaRegister("Juliana", ju_email, "autogenerate_me", ju_uuid) su_email = "*****@*****.**" ecwu.User.register(su_email) su_uuid = edb.get_uuid_db().find_one({'user_email': su_email})['uuid'] logging.debug("Found Sunil's uuid %s" % su_uuid) proxy.habiticaRegister("Sunil", su_email, "autogenerate_me", su_uuid) sh_email = "*****@*****.**" ecwu.User.register(sh_email) sh_uuid = edb.get_uuid_db().find_one({'user_email': sh_email})['uuid'] logging.debug("Found Shankari's uuid %s" % sh_uuid) proxy.habiticaRegister("Shankari", sh_email, "autogenerate_me", sh_uuid)
def register(userEmail): import uuid # This is the UUID that will be stored in the trip database # in order to do some fig leaf of anonymity # Since we now generate truly anonymized UUIDs, and we expect that the # register operation is idempotent, we need to check and ensure that we don't # change the UUID if it already exists. existing_entry = get_uuid_db().find_one({"user_email": userEmail}) if existing_entry is None: anonUUID = uuid.uuid4() else: anonUUID = existing_entry['uuid'] return User.registerWithUUID(userEmail, anonUUID)
def setUp(self): # Make sure we start with a clean slate every time self.serverName = 'localhost' common.dropAllCollections(edb._get_current_db()) import shutil self.config_path = "conf/clients/testclient.settings.json" shutil.copyfile("%s.sample" % self.config_path, self.config_path) logging.info("After setup, client count = %d, profile count = %d, uuid count = %d" % (get_client_db().find().count(), get_profile_db().count(), get_uuid_db().count())) common.loadTable(self.serverName, "Stage_Modes", "emission/tests/data/modes.json")
def get_split_uuid_lists(n_splits, is_public_pipeline): get_count = lambda u: enua.UserCache.getUserCache(u).getMessageCount() """ This is the count of messages in the usercache. While we can use it for the scheduling, it is just a hint, because having a zero count here is no guarantee of zero count in the processing. And in particular, having a zero count here is no indication that there are no pending entries in the long-term cache. I think that's why we used to have the long-term cache and the user cache uuid list separately. That technique is no longer feasible because it requires coordination after the usercache is processed instead of parallelizing the entire pipeline. In general, this should be a pretty good hint, but I am not sure that it is perfect, and I am not ready to try this out weeks before the big deployment, with one day's testing. I also don't want to use UserCache.getMessage() since it returns the entire list instead of just the count. So we use the count as a hint and don't filter out users based on that. """ all_uuids = [e["uuid"] for e in edb.get_uuid_db().find()] if is_public_pipeline: sel_uuids = [u for u in all_uuids if u in estag.TEST_PHONE_IDS] else: sel_uuids = [u for u in all_uuids if u not in estag.TEST_PHONE_IDS] # Add back the test phones for now so that we can test the data # collection changes before deploying them in the wild sel_uuids.extend(TEMP_HANDLED_PUBLIC_PHONES) sel_jobs = [(u, get_count(u)) for u in sel_uuids] # non_zero_jobs = [j for j in sel_jobs if j[1] !=0 ] # Not filtering for now non_zero_jobs = sel_jobs logging.debug( "all_uuids = %s, sel_uuids = %s, sel_jobs = %s, non_zero_jobs = %s" % (len(all_uuids), len(sel_uuids), len(sel_jobs), len(non_zero_jobs))) non_zero_jobs_df = pd.DataFrame(non_zero_jobs, columns=['user_id', 'count']).sort("count") ret_splits = [] for i in range(0, n_splits): ret_splits.append([]) col = 0 for i, nzj in enumerate(non_zero_jobs_df.to_dict('records')): ret_splits[col].append(nzj['user_id']) col = col + 1 if col == n_splits: logging.debug("reached n_splits, setting to zero") col = 0 logging.debug("Split values are %s" % ret_splits) return ret_splits
def setUp(self): # Make sure we start with a clean slate every time self.serverName = 'localhost' common.dropAllCollections(edb._get_current_db()) import shutil self.config_path = "conf/clients/testclient.settings.json" shutil.copyfile("%s.sample" % self.config_path, self.config_path) logging.info( "After setup, client count = %d, profile count = %d, uuid count = %d" % (get_client_db().find().count(), get_profile_db().count(), get_uuid_db().count())) common.loadTable(self.serverName, "Stage_Modes", "emission/tests/data/modes.json")
def getResult(user_uuid): # This is in here, as opposed to the top level as recommended by the PEP # because then we don't have to worry about loading bottle in the unit tests from bottle import template (prevScore, currScore) = getStoredScore(User.fromUUID(user_uuid)) (level, sublevel) = getLevel(currScore) otherCurrScoreList = [] for user_uuid_dict in get_uuid_db().find({}, {'uuid': 1, '_id': 0}): (currPrevScore, currCurrScore) = getStoredScore(User.fromUUID(user_uuid_dict['uuid'])) otherCurrScoreList.append(currCurrScore) otherCurrScoreList.sort() renderedTemplate = template("clients/leaderboard/result_template.html", level_picture_filename = getFileName(level, sublevel), prevScore = prevScore, currScore = currScore, otherCurrScoreList = otherCurrScoreList) return renderedTemplate
def get_aggregate_analytics(): df = pd.DataFrame() for user in edb.get_uuid_db().find(): user_df = esta.TimeSeries.get_time_series(user['uuid']).get_data_df("stats/server_api_time", time_query=None) if not user_df.empty: df = df.append(user_df, ignore_index = True) df['datetime'] = df.ts.apply(lambda ts: dt.datetime.fromtimestamp(ts)) df.ix[df.reading>1, 'reading'] = 1 fig, ax = plt.subplots() ax.xaxis.set_major_locator(mdates.WeekdayLocator()) ax.xaxis.set_major_formatter(mdates.DateFormatter("%m/%d/%Y")) plt.ylabel('Response time') plt.title('App Analytics') f_df = df[df.name == "POST_/result/metrics/timestamp"] f_df.plot(x="datetime", y="reading", ax=ax, style='+', color='g', label='Dashboard') f_df = df[df.name == "POST_/usercache/put"] f_df.plot(x="datetime", y="reading", ax=ax, style='+', color='b', label='Usercache_put') f_df = df[df.name == "POST_/usercache/get"] f_df.plot(x="datetime", y="reading", ax=ax, style='+', color='r', label='Usercache_get') f_df = df[df.name == "POST_/stats/set"] f_df.plot(x="datetime", y="reading", ax=ax, style='+', color='black', label='Stats_set') f_df = df[df.name == "POST_/habiticaRegister"] f_df.plot(x="datetime", y="reading", ax=ax, style='+', color='orange', label='Habitica Sign up_Login') f_df = df[df.name == "POST_/habiticaProxy"] f_df.plot(x="datetime", y="reading", ax=ax, style='+', color='aqua', label='Habitica') f_df = df[df.name.str.contains("POST_/timeline/getTrips")] f_df.plot(x="datetime", y="reading", ax=ax, style='+', color='m', label='Diary') plt.legend() fig.savefig('app_analytics.png') fig.savefig('app_analytics.eps', format='eps', dpi=1000) return
def getUUID(request, inHeader=False): retUUID = None if skipAuth: if 'User' in request.headers or 'user' in request.json: # skipAuth = true, so the email will be sent in plaintext userEmail = __getToken__(request, inHeader) retUUID = __getUUIDFromEmail__(userEmail) logging.debug("skipAuth = %s, returning UUID directly from email %s" % (skipAuth, retUUID)) else: # Return a random user to make it easy to experiment without having to specify a user # TODO: Remove this if it is not actually used from get_database import get_uuid_db user_uuid = get_uuid_db().find_one()['uuid'] retUUID = user_uuid logging.debug("skipAuth = %s, returning arbitrary UUID %s" % (skipAuth, retUUID)) if Client("choice").getClientKey() is None: Client("choice").update(createKey = True) else: userToken = __getToken__(request, inHeader) retUUID = getUUIDFromToken(userToken) request.params.user_uuid = retUUID return retUUID
def testQueryMatching(self): # Load data for the Bay Area dataFileba = "emission/tests/data/real_examples/shankari_2016-06-20" ldba = ecwl.LocalDate({'year': 2016, 'month': 6, 'day': 20}) etc.setupRealExample(self, dataFileba) testUUIDba = self.testUUID edb.get_uuid_db().insert_one({ "uuid": testUUIDba, "user_email": "*****@*****.**" }) etc.runIntakePipeline(testUUIDba) logging.debug("uuid for the bay area = %s " % testUUIDba) # Load data for Hawaii dataFilehi = "emission/tests/data/real_examples/shankari_2016-07-27" ldhi = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 27}) etc.setupRealExample(self, dataFilehi) testUUIDhi = self.testUUID edb.get_uuid_db().insert_one({ "uuid": testUUIDhi, "user_email": "*****@*****.**" }) etc.runIntakePipeline(testUUIDhi) logging.debug("uuid for hawaii = %s " % testUUIDhi) self.testUUIDList = [testUUIDba, testUUIDhi] air_query_spec = { "time_type": "local_date", "from_local_date": { "year": 2016, "month": 2 }, "to_local_date": { "year": 2016, "month": 9 }, "freq": 'DAILY', "checks": [{ "modes": ['WALKING', 'ON_FOOT'], "metric": "count", "threshold": { "$gt": 5 } }, { "modes": ['AIR_OR_HSR'], "metric": "count", "threshold": { "$gt": 1 } }] } # Since this requires at least one air trip, this will only return the # hawaii trip self.assertEqual(tripmetrics.query(air_query_spec), [testUUIDhi]) walk_drive_spec = { "time_type": "local_date", "from_local_date": { "year": 2016, "month": 2 }, "to_local_date": { "year": 2016, "month": 9 }, "freq": 'DAILY', "checks": [{ "modes": ['WALKING', 'ON_FOOT'], "metric": "count", "threshold": { "$gt": 5 } }, { "modes": ['IN_VEHICLE'], "metric": "count", "threshold": { "$gt": 1 } }] } # Since this only requires walk and bike, will return both trips # We can't just do a simple equals check since the uuids may not always # be returned in the same order walk_drive_result = tripmetrics.query(walk_drive_spec) self.assertEqual(len(walk_drive_result), 2) self.assertIn(testUUIDhi, walk_drive_result) self.assertIn(testUUIDba, walk_drive_result)
def get_app_analytics(): df = pd.DataFrame() for user in edb.get_uuid_db().find(): user_df = esta.TimeSeries.get_time_series(user['uuid']).get_data_df("stats/server_api_time", time_query=None) if not user_df.empty: df = df.append(user_df, ignore_index = True) df['datetime'] = df.ts.apply(lambda ts: dt.datetime.fromtimestamp(ts)) df.ix[df.reading>1, 'reading'] = 1 fig, ax = plt.subplots() ax.xaxis.set_major_locator(mdates.WeekdayLocator()) ax.xaxis.set_major_formatter(mdates.DateFormatter("%m/%d/%Y")) plt.ylabel('Response time') dashboard_df = df[df.name == "POST_/result/metrics/timestamp"] dashboard_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None) plt.title('Dashboard') fig.savefig('Dashboard.png') plt.close(fig) fig, ax = plt.subplots() cache_put_df = df[df.name == "POST_/usercache/put"] cache_put_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None) plt.title('Usercache_put') fig.savefig('Usercache_put.png') plt.close(fig) fig, ax = plt.subplots() cache_get_df = df[df.name == "POST_/usercache/get"] cache_get_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None) plt.title('Usercache_get') fig.savefig('Usercache_get.png') plt.close(fig) fig, ax = plt.subplots() stats_set_df = df[df.name == "POST_/stats/set"] stats_set_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None) plt.title('Stats_set') fig.savefig('Stats_set.png') plt.close(fig) fig, ax = plt.subplots() habitica_intro_df = df[df.name == "POST_/habiticaRegister"] habitica_intro_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None) plt.title('Habitica Sign up and Login') fig.savefig('Habitica Sign up_Login.png') plt.close(fig) fig, ax = plt.subplots() habitica_df = df[df.name == "POST_/habiticaProxy"] habitica_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None) plt.title('Habitica') fig.savefig('Habitica.png') plt.close(fig) fig, ax = plt.subplots() diary_df = df[df.name.str.contains("POST_/timeline/getTrips")] diary_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None) plt.title('Diary') fig.savefig('Diary.png') plt.close(fig) return
def precomputeResults(self): for user_uuid_dict in get_uuid_db().find({}, {'uuid': 1, '_id': 0}): logging.info("Computing precomputed results for %s" % user_uuid_dict['uuid']) userclient.runClientSpecificBackgroundTasks(user_uuid_dict['uuid'])
import emission.core.get_database as edb import pandas as pd from uuid import UUID import emission.analysis.plotting.geojson.geojson_feature_converter as gfc import emission.analysis.plotting.leaflet_osm.our_plotter as lo import emission.storage.timeseries.abstract_timeseries as esta import emission.storage.decorations.analysis_timeseries_queries as esda import emission.core.wrapper.entry as ecwe import emission.storage.decorations.trip_queries as esdt import emission.storage.timeseries.timequery as estt all_users = pd.DataFrame( list(edb.get_uuid_db().find({}, { "user_email": 1, "uuid": 1, "_id": 0 }))) test_user_id = all_users.iloc[60].uuid ts = esta.TimeSeries.get_time_series(test_user_id) # Get all cleaned trips for the first user ct_df = ts.get_data_df("analysis/cleaned_trip", time_query=None) #Get GeoJson for trip first_trip_for_user = ct_df.iloc[0] first_trip_start_ts = first_trip_for_user.start_ts first_trip_end_ts = first_trip_for_user.end_ts trip_start_end_fuzz = 10 # seconds trips_geojson_list = gfc.get_geojson_for_ts( test_user_id, first_trip_start_ts - trip_start_end_fuzz,
import logging import attrdict as ad import uuid import emission.core.get_database as edb import emission.storage.timeseries.aggregate_timeseries as estag def reset_collection(coll, old_uuid, new_uuid): logging.debug(coll.update({"user_id": user.uuid}, {"$set": {"user_id": new_uuid}}, multi=True)) if __name__ == '__main__': logging.basicConfig(level=logging.DEBUG) for user_dict in edb.get_uuid_db().find(): user = ad.AttrDict(user_dict) if user.uuid in estag.TEST_PHONE_IDS: logging.debug("Found test phone, skipping reset") else: new_uuid = uuid.uuid4() logging.debug("Mapping %s -> %s" % (new_uuid, user.uuid)) edb.get_uuid_db().update({"uuid" : user.uuid}, {"$set": {"uuid" : new_uuid}}) logging.debug("Resetting alternatives...") reset_collection(edb.get_alternatives_db(), user.uuid, new_uuid) logging.debug("Resetting analysis...") reset_collection(edb.get_analysis_timeseries_db(), user.uuid, new_uuid) logging.debug("Resetting client...") reset_collection(edb.get_client_db(), user.uuid, new_uuid) logging.debug("Resetting client_stats_backup...") reset_collection(edb.get_client_stats_db_backup(), user.uuid, new_uuid) logging.debug("Resetting server_stats_backup...")
def unregister(userEmail): user = User.fromEmail(userEmail) uuid = user.uuid get_uuid_db().delete_one({'user_email': userEmail}) get_profile_db().delete_one({'user_id': uuid}) return uuid
def isRegistered(userEmail): email2UUID = get_uuid_db().find_one({'user_email': userEmail}) if email2UUID is None: return False else: return True
def map_uuid_to_label(uuid, label): ur = edb.get_uuid_db().update_one({"uuid": uuid}, {"$set": {"user_email": label}}) print(ur.raw_result) assert(ur.matched_count == 1) assert(ur.modified_count == 1)
# extract_timeline_for_day_range_and_user.py script # The channel is stored in the "client" field of the profile import emission.core.wrapper.user as ecwu import sys import argparse import logging import json import bson.json_util as bju import emission.core.get_database as edb if __name__ == '__main__': logging.basicConfig(level=logging.DEBUG) parser = argparse.ArgumentParser(prog="get_users_for_channel") parser.add_argument("channel", help="the channel that the users signed in to") parser.add_argument("-o", "--outfile", help="the output filename (default: stdout)") args = parser.parse_args() matched_profiles_it = edb.get_profile_db().find({"client": args.channel}) matched_uuids_it = [p["user_id"] for p in matched_profiles_it] matched_email2uuid_it = [edb.get_uuid_db().find_one({"uuid": u}) for u in matched_uuids_it] logging.debug("Mapped %d entries for channel %s" % (len(matched_email2uuid_it), args.channel)) out_fd = sys.stdout if args.outfile is None else open(args.outfile, "w") json.dump(matched_email2uuid_it, out_fd, default=bju.default)
def unregister(userEmail): user = User.fromEmail(userEmail) uuid = user.uuid get_uuid_db().remove({'user_email': userEmail}) get_profile_db().remove({'user_id': uuid}) return uuid
help="after how many lines we should print a status message.") parser.add_argument("-i", "--info-only", default=False, action='store_true', help="only print entry analysis") parser.add_argument("-p", "--pipeline-purge", default=False, action='store_true', help="purge the pipeline state as well") args = parser.parse_args() fn = args.timeline_filename logging.info("Loading file or prefix %s" % fn) sel_file_list = common.read_files_with_prefix(fn) ts_db = edb.get_timeseries_db() ats_db = edb.get_analysis_timeseries_db() udb = edb.get_uuid_db() psdb = edb.get_pipeline_state_db() db_array = [ts_db, ats_db, udb, psdb] for i, filename in enumerate(sel_file_list): if "pipelinestate" in filename: continue logging.info("=" * 50) logging.info("Deleting data from file %s" % filename) entries = json.load(gzip.open(filename), object_hook = bju.object_hook) # Obtain uuid and rerun information from entries curr_uuid_list, needs_rerun = common.analyse_timeline(entries) if len(curr_uuid_list) > 1:
def calculate_single_suggestion(uuid): #Given a single UUID, create a suggestion for them return_obj = { 'message': "Good job walking and biking! No suggestion to show.", 'savings': "0", 'start_lat': '0.0', 'start_lon': '0.0', 'end_lat': '0.0', 'end_lon': '0.0', 'method': 'bike' } all_users = pd.DataFrame( list(edb.get_uuid_db().find({}, { "uuid": 1, "_id": 0 }))) user_id = all_users.iloc[all_users[all_users.uuid == uuid].index.tolist() [0]].uuid time_series = esta.TimeSeries.get_time_series(user_id) cleaned_sections = time_series.get_data_df("analysis/inferred_section", time_query=None) suggestion_trips = edb.get_suggestion_trips_db() #Go in reverse order because we check by most recent trip counter = 40 if len(cleaned_sections) == 0: return_obj[ 'message'] = 'Suggestions will appear once you start taking trips!' return return_obj for i in range(len(cleaned_sections) - 1, -1, -1): counter -= 1 if counter < 0: #Iterate 20 trips back return return_obj if cleaned_sections.iloc[i]["end_ts"] - cleaned_sections.iloc[i][ "start_ts"] < 5 * 60: continue distance_in_miles = cleaned_sections.iloc[i]["distance"] * 0.000621371 mode = cleaned_sections.iloc[i]["sensed_mode"] start_loc = cleaned_sections.iloc[i]["start_loc"]["coordinates"] start_lat = str(start_loc[0]) start_lon = str(start_loc[1]) trip_id = cleaned_sections.iloc[i]['trip_id'] tripDict = suggestion_trips.find_one({'uuid': uuid}) end_loc = cleaned_sections.iloc[i]["end_loc"]["coordinates"] end_lat = str(end_loc[0]) end_lon = str(end_loc[1]) if mode == 5 and distance_in_miles >= 5 and distance_in_miles <= 15: logging.debug("15 >= distance >= 5 so I'm considering distance: " + str(distance_in_miles)) #Suggest bus if it is car and distance between 5 and 15 default_message = return_obj['message'] try: message = "Try public transportation from " + return_address_from_location(start_lon + "," + start_lat) + \ " to " + return_address_from_location(end_lon + "," + end_lat) + " (tap me to view)" #savings per month, .465 kg co2/mile for car, 0.14323126 kg co2/mile for bus savings = str( int(distance_in_miles * 30 * .465 - 0.14323126 * distance_in_miles * 30)) return { 'message': message, 'savings': savings, 'start_lat': start_lat, 'start_lon': start_lon, 'end_lat': end_lat, 'end_lon': end_lon, 'method': 'public' } insert_into_db(tripDict, trip_id, suggestion_trips, uuid) break except ValueError as e: return_obj['message'] = default_message continue elif (mode == 5 or mode == 3 or mode == 4) and (distance_in_miles < 5 and distance_in_miles >= 1): logging.debug("5 > distance >= 1 so I'm considering distance: " + str(distance_in_miles)) #Suggest bike if it is car/bus/train and distance between 5 and 1 try: message = "Try biking from " + return_address_from_location(start_lon + "," + start_lat) + \ " to " + return_address_from_location(end_lon + "," + end_lat) + " (tap me to view)" savings = str(int(distance_in_miles * 30 * .465)) #savings per month, .465 kg co2/mile insert_into_db(tripDict, trip_id, suggestion_trips, uuid) return { 'message': message, 'savings': savings, 'start_lat': start_lat, 'start_lon': start_lon, 'end_lat': end_lat, 'end_lon': end_lon, 'method': 'bike' } break except: continue elif (mode == 5 or mode == 3 or mode == 4) and (distance_in_miles < 1): logging.debug("1 > distance so I'm considering distance: " + str(distance_in_miles)) #Suggest walking if it is car/bus/train and distance less than 1 try: message = "Try walking/biking from " + return_address_from_location(start_lon + "," + start_lat) + \ " to " + return_address_from_location(end_lon + "," + end_lat) + " (tap me to view)" savings = str(int(distance_in_miles * 30 * .465)) #savings per month, .465 kg co2/mile insert_into_db(tripDict, trip_id, suggestion_trips, uuid) return { 'message': message, 'savings': savings, 'start_lat': start_lat, 'start_lon': start_lon, 'end_lat': end_lat, 'end_lon': end_lon, 'method': 'walk' } break except: continue return return_obj
def calculate_yelp_server_suggestion(uuid): #Given a single UUID, create a suggestion for them return_obj = { 'message': "Good job walking and biking! No suggestion to show.", 'savings': "0", 'start_lat': '0.0', 'start_lon': '0.0', 'end_lat': '0.0', 'end_lon': '0.0', 'method': 'bike' } all_users = pd.DataFrame( list(edb.get_uuid_db().find({}, { "uuid": 1, "_id": 0 }))) user_id = all_users.iloc[all_users[all_users.uuid == uuid].index.tolist() [0]].uuid time_series = esta.TimeSeries.get_time_series(user_id) cleaned_sections = time_series.get_data_df("analysis/cleaned_trip", time_query=None) yelp_suggestion_trips = edb.get_yelp_db() # print(cleaned_sections) #Go in reverse order because we check by most recent trip counter = 40 if len(cleaned_sections) == 0: return_obj[ 'message'] = 'Suggestions will appear once you start taking trips!' return return_obj for i in range(len(cleaned_sections) - 1, -1, -1): counter -= 1 if counter < 0: return return_obj #NOT QUITE SURE WHAT THIS LINE OF CODE IS SUPPOSED TO DO? if cleaned_sections.iloc[i]["end_ts"] - cleaned_sections.iloc[i][ "start_ts"] < 5 * 60: continue #Change distance in meters to miles distance_in_miles = cleaned_sections.iloc[i]["distance"] * 0.000621371 mode = cleaned_sections.iloc[i]["sensed_mode"] start_loc = cleaned_sections.iloc[i]["start_loc"]["coordinates"] start_lon = str(start_loc[0]) start_lat = str(start_loc[1]) start_lat_lon = start_lat + ',' + start_lon trip_id = cleaned_sections.iloc[i]['trip_id'] tripDict = yelp_suggestion_trips.find_one({'uuid': uuid}) #print(tripDict) end_loc = cleaned_sections.iloc[i]["end_loc"]["coordinates"] end_lon = str(end_loc[0]) end_lat = str(end_loc[1]) end_lat_lon = end_lat + ',' + end_lon print(end_lat_lon) endpoint_categories = category_of_business(end_lat_lon) business_locations = {} if len(return_address_from_location_yelp(start_lat_lon)) == 1: begin_address = return_address_from_location_yelp(start_lat_lon) else: begin_address = return_address_from_location_yelp(start_lat_lon)[2] if len(return_address_from_location_yelp(end_lat_lon)) == 1: continue city = return_address_from_location_yelp(end_lat_lon)[1] address = return_address_from_location_yelp(end_lat_lon)[2] #ALREADY CALCULATED BY DISTANCE_IN_MILES #comp_distance = distance(dummy, end_lat_lon) location_review = review_start_loc(end_lat_lon) ratings_bus = {} error_message = 'Sorry, unable to retrieve datapoint' error_message_categor = 'Sorry, unable to retrieve datapoint because datapoint is a house or datapoint does not belong in service categories' if (endpoint_categories): for categor in endpoint_categories: queried_bus = search(API_KEY, categor, city)['businesses'] for q in queried_bus: if q['rating'] >= location_review: #'Coordinates' come out as two elements, latitude and longitude ratings_bus[q['name']] = q['rating'] obtained = q['location']['display_address'][0] + q[ 'location']['display_address'][1] obtained.replace(' ', '+') business_locations[q['name']] = obtained else: return {'message': error_message_categor, 'method': 'bike'} for a in business_locations: calculate_distance = distance(start_lat_lon, business_locations[a]) #Will check which mode the trip was taking for the integrated calculate yelp suggestion if calculate_distance < distance_in_miles and calculate_distance < 5 and calculate_distance >= 1: try: message = "Why didn't you bike from " + begin_address + " to " + a + " (tap me to view) " + a + \ " has better reviews, closer to your original starting point, and has a rating of " + str(ratings_bus[a]) #Not sure to include the amount of carbon saved #Still looking to see what to return with this message, because currently my latitude and longitudes are stacked together in one string insert_into_db(tripDict, trip_id, yelp_suggestion_trips, uuid) return {'message': message, 'method': 'bike'} #insert_into_db(tripDict, trip_id, suggestion_trips, uuid) break except ValueError as e: continue elif calculate_distance < distance_in_miles and calculate_distance < 1: try: message = "Why didn't you walk from " + begin_address + " to " + a + " (tap me to view) " + a + \ " has better reviews, closer to your original starting point, and has a rating of " + str(ratings_bus[a]) insert_into_db(tripDict, trip_id, yelp_suggestion_trips, uuid) return {'message': message, 'method': 'walk'} break except ValueError as e: continue elif calculate_distance < distance_in_miles and calculate_distance >= 5 and calculate_distance <= 15: try: message = "Why didn't you check out public transportation from " + begin_address + " to " + a + " (tap me to view) " + a + \ " has better reviews, closer to your original starting point, and has a rating of " + str(ratings_bus[a]) insert_into_db(tripDict, trip_id, yelp_suggestion_trips, uuid) return {'message': message, 'method': 'public'} break except ValueError as e: continue
def get_app_analytics(): df = pd.DataFrame() for user in edb.get_uuid_db().find(): user_df = esta.TimeSeries.get_time_series(user['uuid']).get_data_df( "stats/server_api_time", time_query=None) if not user_df.empty: df = df.append(user_df, ignore_index=True) df['datetime'] = df.ts.apply(lambda ts: dt.datetime.fromtimestamp(ts)) df.ix[df.reading > 1, 'reading'] = 1 fig, ax = plt.subplots() ax.xaxis.set_major_locator(mdates.WeekdayLocator()) ax.xaxis.set_major_formatter(mdates.DateFormatter("%m/%d/%Y")) plt.ylabel('Response time') dashboard_df = df[df.name == "POST_/result/metrics/timestamp"] dashboard_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None) plt.title('Dashboard') fig.savefig('Dashboard.png') plt.close(fig) fig, ax = plt.subplots() cache_put_df = df[df.name == "POST_/usercache/put"] cache_put_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None) plt.title('Usercache_put') fig.savefig('Usercache_put.png') plt.close(fig) fig, ax = plt.subplots() cache_get_df = df[df.name == "POST_/usercache/get"] cache_get_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None) plt.title('Usercache_get') fig.savefig('Usercache_get.png') plt.close(fig) fig, ax = plt.subplots() stats_set_df = df[df.name == "POST_/stats/set"] stats_set_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None) plt.title('Stats_set') fig.savefig('Stats_set.png') plt.close(fig) fig, ax = plt.subplots() habitica_intro_df = df[df.name == "POST_/habiticaRegister"] habitica_intro_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None) plt.title('Habitica Sign up and Login') fig.savefig('Habitica Sign up_Login.png') plt.close(fig) fig, ax = plt.subplots() habitica_df = df[df.name == "POST_/habiticaProxy"] habitica_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None) plt.title('Habitica') fig.savefig('Habitica.png') plt.close(fig) fig, ax = plt.subplots() diary_df = df[df.name.str.contains("POST_/timeline/getTrips")] diary_df.plot(x="datetime", y="reading", ax=ax, style='+', legend=None) plt.title('Diary') fig.savefig('Diary.png') plt.close(fig) return
def clearRelatedDb(self): edb.get_timeseries_db().remove({"user_id": {"$in": self.testUUIDList}}) edb.get_analysis_timeseries_db().remove({"user_id": {"$in": self.testUUIDList}}) edb.get_usercache_db().remove({"user_id": {"$in": self.testUUIDList}}) edb.get_uuid_db().remove({"user_id": {"$in": self.testUUIDList}})
def find_inactive_users(): inactive_users = [] inactive_users_new_consent = "" inactive_users_old_consent = "" inactive_users_before_september = "" inactive_users_after_september = "" one_week_ago_ts = arrow.utcnow().replace(weeks=-1).timestamp september_first = arrow.get('2016-09-01').timestamp for user in edb.get_uuid_db().find(): db = esta.TimeSeries.get_time_series(user['uuid']).get_data_df( "stats/server_api_time", time_query=None) new_consent = esta.TimeSeries.get_time_series( user['uuid']).get_data_df("config/consent", time_query=None) signup_date = arrow.get(user['update_ts']) if db.empty: inactive_users.append((user['user_email'], signup_date.date(), ())) if new_consent.empty: inactive_users_new_consent += str(user['user_email']) + ', ' else: inactive_users_old_consent += str(user['user_email']) + ', ' if signup_date.timestamp < september_first: inactive_users_before_september += str( user['user_email']) + ', ' else: inactive_users_after_september += str( user['user_email']) + ', ' else: #check last usercache call: #the user is inactive if there are no calls or if the last one was before one_week_ago_ts last_usercache_call = db[db['name'].str.contains( 'usercache', case=False)].tail(1) if last_usercache_call.empty: inactive_users.append( (user['user_email'], signup_date.date(), ())) if new_consent.empty: inactive_users_new_consent += str( user['user_email']) + ', ' else: inactive_users_old_consent += str( user['user_email']) + ', ' if signup_date.timestamp < september_first: inactive_users_before_september += str( user['user_email']) + ', ' else: inactive_users_after_september += str( user['user_email']) + ', ' else: if last_usercache_call.iloc[0]['ts'] < one_week_ago_ts: inactive_users.append( (user['user_email'], signup_date.date(), arrow.get(last_usercache_call.iloc[0]['ts']).date())) if new_consent.empty: inactive_users_new_consent += str( user['user_email']) + ', ' else: inactive_users_old_consent += str( user['user_email']) + ', ' if signup_date.timestamp < september_first: inactive_users_before_september += str( user['user_email']) + ', ' else: inactive_users_after_september += str( user['user_email']) + ', ' inactive_users_table = pd.DataFrame( inactive_users, columns=['Email', 'Last Sign Up Date', 'Last Usercache Call']) print "\nList of inactive users emails and date they signed up:" print inactive_users_table print "\nEmails of inactive users who consented to the new IRB protocol:" print inactive_users_new_consent[:-2] print "\nEmails of inactive users who did not consent to the new IRB protocol:" print inactive_users_old_consent[:-2] print "\nEmails of inactive users who signed up before September 1st:" print inactive_users_before_september[:-2] print "\nEmails of inactive users who signed up after September 1st:" print inactive_users_after_september[:-2] return
def testQueryMatching(self): # Load data for the Bay Area dataFileba = "emission/tests/data/real_examples/shankari_2016-06-20" ldba = ecwl.LocalDate({'year': 2016, 'month': 6, 'day': 20}) etc.setupRealExample(self, dataFileba) testUUIDba = self.testUUID edb.get_uuid_db().insert({"uuid": testUUIDba, "user_email": "*****@*****.**"}) etc.runIntakePipeline(testUUIDba) logging.debug("uuid for the bay area = %s " % testUUIDba) # Load data for Hawaii dataFilehi = "emission/tests/data/real_examples/shankari_2016-07-27" ldhi = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 27}) etc.setupRealExample(self, dataFilehi) testUUIDhi = self.testUUID edb.get_uuid_db().insert({"uuid": testUUIDhi, "user_email": "*****@*****.**"}) etc.runIntakePipeline(testUUIDhi) logging.debug("uuid for hawaii = %s " % testUUIDhi) self.testUUIDList = [testUUIDba, testUUIDhi] air_query_spec = { "time_type": "local_date", "from_local_date": { "year": 2016, "month": 2}, "to_local_date": { "year": 2016, "month": 9}, "freq": 'DAILY', "checks": [ { "modes": ['WALKING', 'ON_FOOT'], "metric": "count", "threshold": {"$gt": 5} }, { "modes": ['AIR_OR_HSR'], "metric": "count", "threshold": {"$gt": 1} } ] } # Since this requires at least one air trip, this will only return the # hawaii trip self.assertEqual(tripmetrics.query(air_query_spec), [testUUIDhi]) walk_drive_spec = { "time_type": "local_date", "from_local_date": { "year": 2016, "month": 2}, "to_local_date": { "year": 2016, "month": 9}, "freq": 'DAILY', "checks": [ { "modes": ['WALKING', 'ON_FOOT'], "metric": "count", "threshold": {"$gt": 5} }, { "modes": ['IN_VEHICLE'], "metric": "count", "threshold": {"$gt": 1} } ] } # Since this only requires walk and bike, will return both trips # We can't just do a simple equals check since the uuids may not always # be returned in the same order walk_drive_result = tripmetrics.query(walk_drive_spec) self.assertEqual(len(walk_drive_result), 2) self.assertIn(testUUIDhi, walk_drive_result) self.assertIn(testUUIDba, walk_drive_result)
import argparse import sys import logging import emission.core.get_database as edb import emission.net.ext_service.habitica.proxy as proxy if __name__ == '__main__': logging.basicConfig(level=logging.DEBUG) parser = argparse.ArgumentParser() parser.add_argument( "user_email", help= "the email address of the user whose habitica account you want to clean up" ) args = parser.parse_args() del_uuid = edb.get_uuid_db().find_one({'user_email': args.user_email})['uuid'] logging.debug("Found uuid %s" % del_uuid) del_habitica_creds = edb.get_habitica_db().find_one({'user_id': del_uuid}) logging.debug("del_habitica_creds = %s" % del_habitica_creds) del_result = proxy.habiticaProxy( del_uuid, "DELETE", "/api/v3/user", {'password': del_habitica_creds['habitica_password']}) logging.debug("delete result = %s" % del_result)
help="only print entry analysis") parser.add_argument("-p", "--pipeline-purge", default=False, action='store_true', help="purge the pipeline state as well") args = parser.parse_args() fn = args.timeline_filename logging.info("Loading file or prefix %s" % fn) sel_file_list = common.read_files_with_prefix(fn) ts_db = edb.get_timeseries_db() ats_db = edb.get_analysis_timeseries_db() udb = edb.get_uuid_db() psdb = edb.get_pipeline_state_db() for i, filename in enumerate(sel_file_list): logging.info("=" * 50) logging.info("Deleting data from file %s" % filename) entries = json.load(gzip.open(filename), object_hook=bju.object_hook) # Obtain uuid and rerun information from entries curr_uuid_list, needs_rerun = common.analyse_timeline(entries) if len(curr_uuid_list) > 1: logging.warning("Found %d users, %s in filename, aborting! " % (len(curr_uuid_list), curr_uuid_list)) raise RuntimeException( "Found %d users, %s in filename, expecting 1, %s" %
import argparse import logging import json import bson.json_util as bju import emission.core.get_database as edb if __name__ == '__main__': logging.basicConfig(level=logging.DEBUG) parser = argparse.ArgumentParser(prog="get_users_for_channel") parser.add_argument("channel", help="the channel that the users signed in to") parser.add_argument("-o", "--outfile", help="the output filename (default: stdout)") args = parser.parse_args() matched_profiles_it = edb.get_profile_db().find({"client": args.channel}) matched_uuids_it = [p["user_id"] for p in matched_profiles_it] matched_email2uuid_it = [ edb.get_uuid_db().find_one({"uuid": u}) for u in matched_uuids_it ] logging.debug("Mapped %d entries for channel %s" % (len(matched_email2uuid_it), args.channel)) out_fd = sys.stdout if args.outfile is None else open(args.outfile, "w") json.dump(matched_email2uuid_it, out_fd, default=bju.default)
import emission.core.get_database as edb import emission.storage.timeseries.aggregate_timeseries as estag def reset_collection(coll, old_uuid, new_uuid): logging.debug( coll.update({"user_id": user.uuid}, {"$set": { "user_id": new_uuid }}, multi=True)) if __name__ == '__main__': logging.basicConfig(level=logging.DEBUG) for user_dict in edb.get_uuid_db().find(): user = ad.AttrDict(user_dict) if user.uuid in estag.TEST_PHONE_IDS: logging.debug("Found test phone, skipping reset") else: new_uuid = uuid.uuid4() logging.debug("Mapping %s -> %s" % (new_uuid, user.uuid)) edb.get_uuid_db().update({"uuid": user.uuid}, {"$set": { "uuid": new_uuid }}) logging.debug("Resetting alternatives...") reset_collection(edb.get_alternatives_db(), user.uuid, new_uuid) logging.debug("Resetting analysis...") reset_collection(edb.get_analysis_timeseries_db(), user.uuid, new_uuid)
def get_all_uuids(): all_uuids = [e["uuid"] for e in edb.get_uuid_db().find()] return all_uuids
import argparse import sys import logging import emission.core.get_database as edb import emission.net.ext_service.habitica.proxy as proxy if __name__ == '__main__': logging.basicConfig(level=logging.DEBUG) parser = argparse.ArgumentParser() parser.add_argument("user_email", help="the email address of the user whose habitica account you want to clean up") args = parser.parse_args() del_uuid = edb.get_uuid_db().find_one({'user_email': args.user_email})['uuid'] logging.debug("Found uuid %s" % del_uuid) del_habitica_creds = edb.get_habitica_db().find_one({'user_id': del_uuid}) logging.debug("del_habitica_creds = %s" % del_habitica_creds) del_result = proxy.habiticaProxy(del_uuid, "DELETE", "/api/v3/user", {'password': del_habitica_creds['habitica_password']}) logging.debug("delete result = %s" % del_result)