def findHotSpotsTask(): profiles = Profile.objects.all() hot_spots_frequencies = {} for profile in profiles: try: optin_object = Optin.objects.get(datastore_owner = profile, app_id = "Living Lab", lab_id = "MIT-FIT") except Optin.DoesNotExist: optin_object = None if optin_object: if optin_object.data_aggregation == 0: continue internal_data_store = getInternalDataStore(profile, "Living Lab", "MIT-FIT", "") hot_spots_of_user = hotSpotsComputation(internal_data_store) for hot_spot_of_user in hot_spots_of_user: if hot_spot_of_user in hot_spots_frequencies: hot_spots_frequencies[hot_spot_of_user] = hot_spots_frequencies[hot_spot_of_user] + 1 else: hot_spots_frequencies[hot_spot_of_user] = 1 hot_spots_frequencies_list = [] for hot_spot in hot_spots_frequencies: hot_spot_frequency = { "lat": hot_spot[0], "lng": hot_spot[1], "frequency": hot_spots_frequencies[hot_spot]} hot_spots_frequencies_list.append(hot_spot_frequency) for profile in profiles: internal_data_store = getInternalDataStore(profile, "Living Lab", "Frequent Locations", "") internal_data_store.saveAnswer("hotspots", hot_spots_frequencies_list)
def findActiveTimesTask(): profiles = Profile.objects.all() time_averages = [0] * 24 num_users = 0 for profile in profiles: num_users += 1 # token = socialhealth_tasks.getToken(profile, "app-uuid") # internalDataStore = socialhealth_tasks.getInternalDataStore(profile, "Living Lab", "Social Health Tracker", token) internalDataStore = getInternalDataStore(profile, "Living Lab", "MIT-FIT", "") user_time_averages = activeTimesComputation(internalDataStore) for i in range(len(time_averages)): time_averages[i] += user_time_averages[i] #print num_users #print time_averages for i in range(len(time_averages)): time_averages[i] = time_averages[i] // num_users for profile in profiles: # token = socialhealth_tasks.getToken(profile, "app-uuid") # internalDataStore = socialhealth_tasks.getInternalDataStore(profile, "Living Lab", "Social Health Tracker", token) internalDataStore = getInternalDataStore(profile, "Living Lab", "MIT-FIT", "") internalDataStore.saveAnswer("activeTimes", time_averages)
def leaderboardComputationTask(): profiles = Profile.objects.all() # profiles = [] # profiles.append(Profile.objects.get(uuid="341cc5cd-0f42-45f1-9f66-273ac3ed8b2e")) unsorted_dict = {} for profile in profiles: # token = socialhealth_tasks.getToken(profile, "app-uuid") # internalDataStore = socialhealth_tasks.getInternalDataStore(profile, "Living Lab", "Social Health Tracker", token) internalDataStore = getInternalDataStore(profile, "Living Lab", "MIT-FIT", "") values = aggregateLeaderboardComputation(internalDataStore, "activityStats", leaderboardComputation, False) unsorted_dict[profile.uuid] = LeaderboardRanking({ "average_activity_rate": values[0]["average_activity_rate"], "max_high_activity_rate": values[0]["max_high_activity_rate"], "min_low_activity_rate": values[0]["min_low_activity_rate"]}) #sorted_dict = sorted(unsorted_dict.values(), key=attrgetter('average_activity_rate')) sorted_dict = sorted(unsorted_dict, key = lambda uuid: unsorted_dict[uuid].average_activity_rate, reverse=False) average_activity_rates_list = [] for uuid in sorted_dict: average_activity_rates_list.append(unsorted_dict[uuid].get_average_activity_rate()) for uuid in sorted_dict: profile = Profile.objects.get(uuid=uuid) # token = socialhealth_tasks.getToken(profile, "app-uuid") # internalDataStore = socialhealth_tasks.getInternalDataStore(profile, "Living Lab", "Social Health Tracker", token) internalDataStore = getInternalDataStore(profile, "Living Lab", "MIT-FIT", "") percentileValue = calculatePercentile(average_activity_rates_list, unsorted_dict[uuid].get_average_activity_rate()) user_activity_list = [] user_activity_dict = { "average_activity_rate": unsorted_dict[uuid].get_average_activity_rate(), "max_high_activity_rate": unsorted_dict[uuid].get_max_high_activity_rate(), "min_low_activity_rate": unsorted_dict[uuid].get_min_low_activity_rate(), "rank": {"own": len(sorted_dict) - sorted_dict.index(uuid), "total": len(sorted_dict), "percentile": percentileValue} } user_activity_list.append(user_activity_dict) internalDataStore.saveAnswer("activityStats", user_activity_list)
def recentSocialHealthScores2(): profiles = Profile.objects.all() startTime = getStartTime(6, True) currentTime = time.time() timeRanges = [(start, start + 3600*4) for start in range(int(startTime), int(currentTime), 3600*4)] sums = {"activity": 0, "social": 0, "focus": 0} activeUsers = [] data = {} for profile in profiles: token = getToken(profile, "app-uuid") internalDataStore = getInternalDataStore(profile, "Living Lab", "Social Health Tracker", token) activityLevels = aggregateForUser(internalDataStore, "RecentActivityByHour", timeRanges, activityForTimeRange, False) if len(activityLevels) > 0: socialLevels = aggregateForUser(internalDataStore, "RecentSocialByHour", timeRanges, socialForTimeRange, True) focusLevels = aggregateForUser(internalDataStore, "RecentFocusByHour", timeRanges, focusForTimeRange, True) activityScore = computeActivityScore(activityLevels) socialScore = computeSocialScore(socialLevels) focusScore = computeFocusScore(focusLevels) sums["activity"] += activityScore sums["social"] += socialScore sums["focus"] += focusScore activeUsers.append(profile) data[profile.uuid] = {} data[profile.uuid]["user"] = { "activity": activityScore, "social": socialScore, "focus": focusScore } numUsers = len(activeUsers) if numUsers > 0: averages = { k: sums[k] / numUsers for k in sums } variances = { k: [(data[p.uuid]["user"][k] - averages[k])**2 for p in activeUsers] for k in averages } stdDevs = { k: math.sqrt(sum(variances[k]) / len(variances[k])) for k in variances } for profile in activeUsers: token = getToken(profile, "app-uuid") internalDataStore = getInternalDataStore(profile, "Living Lab", "Social Health Tracker", token) data[profile.uuid]["averageLow"] = { k: max(0, averages[k] - stdDevs[k]) for k in stdDevs } data[profile.uuid]["averageHigh"] = { k: min(averages[k] + stdDevs[k], 10) for k in stdDevs } internalDataStore.saveAnswer("socialhealth", data[profile.uuid]) return data
def recentProbeDataScores(): profiles = Profile.objects.all() for profile in profiles: startTime = socialhealth_tasks.getStartTime(6, True) currentTime = time.time() timeRanges = [ (start, start + 3600) for start in range(int(startTime), int(currentTime), 3600) ] probeAnswerKeys = { 'recentActivityProbeByHour': 'ActivityProbe', 'recentSmsProbeByHour': 'SmsProbe', 'recentCallLogProbeByHour': 'CallLogProbe', 'recentBluetoothProbeByHour': 'BluetoothProbe', 'recentWifiProbeByHour': 'WifiProbe', 'recentSimpleLocationProbeByHour': 'LocationProbe', 'recentRunningApplicationsProbeByHour': 'RunningApplicationsProbe', 'recentHardwareInfoProbeByHour': 'HardwareInfoProbe', 'recentAppUsageProbeByHour': 'AppUsageProbe' } # print profile # token = socialhealth_tasks.getToken(profile, "app-uuid") # internalDataStore = socialhealth_tasks.getInternalDataStore(profile, "Living Lab", "Social Health Tracker", token) internalDataStore = getInternalDataStore(profile, "Living Lab", "MIT-FIT", "") #for testing, currently use the following user #if profile.uuid == "341cc5cd-0f42-45f1-9f66-273ac3ed8b2e": for probeAnswerKey, probe in probeAnswerKeys.iteritems(): probeLevels = aggregateForUser(probe, internalDataStore, probeAnswerKey, timeRanges, probeForTimeRange, False)
def findRecentPlaces(): currentTime = time.time() today = date.fromtimestamp(currentTime) startTime = time.mktime((today - timedelta(days=14)).timetuple()) profiles = Profile.objects.all() for profile in profiles: ids = getInternalDataStore(profile, "Living Lab", "My Places", "") ids.saveAnswer("RecentPlaces", []) # Note: we're not taking the full 9-5 sampling. Clustering is expensive, so anything we can leave out helps... # Combined with the fact that "lunch" time might not be indicative of work locations, this might be more accurate anyway nineToFives = [ (nine, nine + 3600 * 8) for nine in range(int(startTime + 3600 * 9), int(currentTime), 3600 * 24) ] #nineToFives.extend([(two, two + 3600*2) for two in range(int(startTime + 3600*14), int(currentTime), 3600*24)]) #print "Finding work locations..." data = findRecentPlaceBounds("work", nineToFives) midnightToSixes = [ (midnight, midnight + 3600 * 6) for midnight in range(int(startTime), int(currentTime), 3600 * 24) ] #print "Finding home locations..." data = findRecentPlaceBounds("home", midnightToSixes) print "... done with RecentPlaces" return data
def recentFocusLevels(includeBlanks = False, means = None, devs = None): currentTime = time.time() answerKey = "RecentFocusByHour" today = date.fromtimestamp(currentTime) startTime = time.mktime((today - timedelta(days=6)).timetuple()) timeRanges = [(start, start + 3600*4) for start in range(int(startTime), int(currentTime), 3600*4)] data = aggregateForAllUsers(None, timeRanges, focusForTimeRange, includeBlanks) for uuid, focusList in data.iteritems(): if len(focusList) > 0: data[uuid] = [] if means is not None and devs is not None and uuid in means and uuid in devs: mean = means[uuid] if means[uuid] > 0 else 1 dev = devs[uuid] if devs[uuid] > 0 else 1 for f in focusList: f["focus"] = 10.0*(1.0 - CDF(f["focus"], mean, dev)) data[uuid].append(f) else: for f in focusList: f["focus"] = int(f["focus"]) data[uuid].append(f) profile = Profile.objects.get(uuid = uuid) # TODO: get a token here to run internal queries against... ids = getInternalDataStore(profile, "") ids.saveAnswer(answerKey, data[uuid]) return data
def recentFocusLevels(includeBlanks=False, means=None, devs=None): currentTime = time.time() answerKey = "RecentFocusByHour" today = date.fromtimestamp(currentTime) startTime = time.mktime((today - timedelta(days=6)).timetuple()) timeRanges = [ (start, start + 3600 * 4) for start in range(int(startTime), int(currentTime), 3600 * 4) ] data = aggregateForAllUsers(None, timeRanges, focusForTimeRange, "Focus", includeBlanks) for uuid, focusList in data.iteritems(): if len(focusList) > 0: data[uuid] = [] if means is not None and devs is not None and uuid in means and uuid in devs: mean = means[uuid] if means[uuid] > 0 else 1 dev = devs[uuid] if devs[uuid] > 0 else 1 for f in focusList: f["focus"] = 10.0 * (1.0 - CDF(f["focus"], mean, dev)) data[uuid].append(f) else: for f in focusList: f["focus"] = int(f["focus"]) data[uuid].append(f) profile = Profile.objects.get(uuid=uuid) # TODO: get a token here to run internal queries against... ids = getInternalDataStore(profile, "Living Lab", "Social Health Tracker", "") ids.saveAnswer(answerKey, data[uuid]) return data
def data(request): '''decrypt funf database files, and upload them to your PDS''' result = {} if request.method == 'GET': template = {'token': request.GET['bearer_token']} return HttpResponse("File not found", status=404) pds = None #scope = AccessRange.objects.get(key="funf_write") authorization = PDSAuthorization("funf_write", audit_enabled=True) if (not authorization.is_authorized(request)): return HttpResponse("Unauthorized", status=401) scope = 'funf_write' token = request.GET['bearer_token'] datastore_owner_uuid = request.GET["datastore_owner__uuid"] datastore_owner, ds_owner_created = Profile.objects.get_or_create( uuid=datastore_owner_uuid) print "Creating IDS for %s" % datastore_owner_uuid #internalDataStore = getInternalDataStore(datastore_owner, "Living Lab", "Social Health Tracker", "Activity", token) internalDataStore = getInternalDataStore(datastore_owner, "Living Lab", "Social Health Tracker", token) #collection = connection[datastore_owner.getDBName()]["funf"] funf_password = "******" key = decrypt.key_from_password(str(funf_password)) print "PDS: set_funf_data on uuid: %s" % datastore_owner_uuid for filename, file in request.FILES.items(): try: try: file_path = upload_dir + file.name write_file(str(file_path), file) except Exception as ex: print "failed to write file to " + file_path + ". Please make sure you have write permission to the directory set in settings.SERVER_UPLOAD_DIR" dbdecrypt.decrypt_if_not_db_file(file_path, key) con = sqlite3.connect(file_path) cur = con.cursor() cur.execute("select name, value from data") inserted = [] for row in cur: name = convert_string(row[0]) json_insert = clean_keys(json.JSONDecoder().decode( convert_string(row[1]))) #print json_insert$ # Insert into PDS$ pds_data = {} pds_data['time'] = json_insert.get('timestamp') pds_data['value'] = json_insert pds_data['key'] = name insert_pds(internalDataStore, token, pds_data) inserted.append(convert_string(json_insert) + '\n') result = {'success': True, 'rows_inserted': len(inserted)} print "Inserted %s rows" % len(inserted) except Exception as e: print "Exception from funf_connector on pds:" print "%s" % e result = {'success': False, 'error_message': e.message} finally: response_dict = {"status": "success"} return HttpResponse(json.dumps(result), content_type='application/json')
def data(request): """decrypt funf database files, and upload them to your PDS""" result = {} if request.method == "GET": template = {"token": request.GET["bearer_token"]} return HttpResponse("File not found", status=404) pds = None # scope = AccessRange.objects.get(key="funf_write") authorization = PDSAuthorization("funf_write", audit_enabled=True) if not authorization.is_authorized(request): return HttpResponse("Unauthorized", status=401) scope = "funf_write" token = request.GET["bearer_token"] datastore_owner_uuid = request.GET["datastore_owner__uuid"] datastore_owner, ds_owner_created = Profile.objects.get_or_create(uuid=datastore_owner_uuid) print "Creating IDS for %s" % datastore_owner_uuid # internalDataStore = getInternalDataStore(datastore_owner, "Living Lab", "Social Health Tracker", "Activity", token) internalDataStore = getInternalDataStore(datastore_owner, "Living Lab", "Social Health Tracker", token) # collection = connection[datastore_owner.getDBName()]["funf"] funf_password = "******" key = decrypt.key_from_password(str(funf_password)) print "PDS: set_funf_data on uuid: %s" % datastore_owner_uuid for filename, file in request.FILES.items(): try: try: file_path = upload_dir + file.name write_file(str(file_path), file) except Exception as ex: print "failed to write file to " + file_path + ". Please make sure you have write permission to the directory set in settings.SERVER_UPLOAD_DIR" dbdecrypt.decrypt_if_not_db_file(file_path, key) con = sqlite3.connect(file_path) cur = con.cursor() cur.execute("select name, value from data") inserted = [] for row in cur: name = convert_string(row[0]) json_insert = clean_keys(json.JSONDecoder().decode(convert_string(row[1]))) # print json_insert$ # Insert into PDS$ pds_data = {} pds_data["time"] = json_insert.get("timestamp") pds_data["value"] = json_insert pds_data["key"] = name insert_pds(internalDataStore, token, pds_data) inserted.append(convert_string(json_insert) + "\n") result = {"success": True, "rows_inserted": len(inserted)} print "Inserted %s rows" % len(inserted) except Exception as e: print "Exception from funf_connector on pds:" print "%s" % e result = {"success": False, "error_message": e.message} finally: response_dict = {"status": "success"} return HttpResponse(json.dumps(result), content_type="application/json")
def testGetData(): profiles = Profile.objects.all() #print profiles[17].uuid # token = socialhealth_tasks.getToken(profiles[17], "app-uuid") # internalDataStore = socialhealth_tasks.getInternalDataStore(profiles[17], "Living Lab", "Social Health Tracker", token) internalDataStore = getInternalDataStore(profile, "Living Lab", "MIT-FIT", "") probes = ["LocationProbe", "ActivityProbe", "SmsProbe", "CallLogProbe", "BluetoothProbe", "WifiProbe", "ScreenProbe"] startTime = 1403136000 endTime = 1403222400 internalDataStore.getData(probes[1], startTime, endTime)
def recommendEvents(): profiles = Profile.objects.all() eventRegistrations = {} userRegistrations = {} for profile in profiles: #print profile.uuid # token = socialhealth_tasks.getToken(profile, "app-uuid") # internalDataStore = socialhealth_tasks.getInternalDataStore(profile, "Living Lab", "Social Health Tracker", token) internalDataStore = getInternalDataStore(profile, "Living Lab", "MIT-FIT", "") eventRegistrations, userEventRegistrations = eventRecommendationComputation(internalDataStore, eventRegistrations, profile.uuid) userRegistrations[profile.uuid] = userEventRegistrations #print eventRegistrations eventSet = set() jaccardCoefficientDict = {} for event1 in eventRegistrations.keys(): for event2 in eventRegistrations.keys(): if event1 != event2: usersEvent1 = eventRegistrations[event1] usersEvent2 = eventRegistrations[event2] intersectUsers = list(set(usersEvent1) & set(usersEvent2)) unionUsers = list(set(usersEvent1) | set(usersEvent2)) jaccardCoefficientKey = (event1, event2) eventSet.add(event1) eventSet.add(event2) if len(unionUsers) > 0: jaccardCoefficientDict[jaccardCoefficientKey] = len(intersectUsers)/len(unionUsers) else: jaccardCoefficientDict[jaccardCoefficientKey] = 0 #print jaccardCoefficientDict for profile in profiles: print profile.uuid recommendedEvents = {} for userRegisteredEvent in userRegistrations[profile.uuid]: for event in eventSet: if userRegisteredEvent != event: if event in recommendedEvents: if jaccardCoefficientDict[(userRegisteredEvent, event)] > recommendedEvents[event]: recommendedEvents[event] = jaccardCoefficientDict[(userRegisteredEvent, event)] else: recommendedEvents[event] = jaccardCoefficientDict[(userRegisteredEvent, event)] #print recommendedEvents sortedRecommendedEvents = sorted(recommendedEvents.items(), key = lambda recommendedEvent: recommendedEvent[1], reverse=True) #print sortedRecommendedEvents for event in sortedRecommendedEvents[:3]: for eventDetails in event[0]: if u'name' in eventDetails[0]: print eventDetails[1] + ", " , print
def findActiveLocationsTask(): profiles = Profile.objects.all() location_frequencies = {} for profile in profiles: # token = socialhealth_tasks.getToken(profile, "app-uuid") # internalDataStore = socialhealth_tasks.getInternalDataStore(profile, "Living Lab", "Social Health Tracker", token) internalDataStore = getInternalDataStore(profile, "Living Lab", "MIT-FIT", "") values = activeLocationsComputation(internalDataStore) #print profile.uuid #print values for value in values: location_value = tuple((round(value[0],4), round(value[1],4))) if location_value in location_frequencies: location_frequencies[location_value] = location_frequencies[location_value] + 1 else: location_frequencies[location_value] = 1 #print location_frequencies location_frequencies_list = [] for key in location_frequencies: #print key location_value = { "lat": key[0], "lng": key[1], "count": location_frequencies[key]} location_frequencies_list.append(location_value) #print location_frequencies_list for profile in profiles: # token = socialhealth_tasks.getToken(profile, "app-uuid") # internalDataStore = socialhealth_tasks.getInternalDataStore(profile, "Living Lab", "Social Health Tracker", token) internalDataStore = getInternalDataStore(profile, "Living Lab", "MIT-FIT", "") internalDataStore.saveAnswer("activeLocations", location_frequencies_list)
def populateEventsForUsers(): profiles = Profile.objects.all() for profile in profiles: # token = socialhealth_tasks.getToken(profile, "app-uuid") # internalDataStore = socialhealth_tasks.getInternalDataStore(profile, "Living Lab", "Social Health Tracker", token) internalDataStore = getInternalDataStore(profile, "Living Lab", "MIT-FIT", "") events = [] random_numbers = random.sample(range(25), 7) for random_number in random_numbers: events.append(data[random_number]) #print events internalDataStore.saveAnswer("mitfitEventRegistrations", events)
def recentSocialHealthScores(): profiles = Profile.objects.all() data = {} activityScores = recentActivityScore() socialScores = recentSocialScore() focusScores = recentFocusScore() scoresList = [activityScores.values(), socialScores.values(), focusScores.values()] print scoresList # scoresList = [[d for d in scoreList if d > 0.0] for scoreList in scoresList] averages = [sum(scores) / len(scores) if len(scores) > 0 else 0 for scores in scoresList] variances = [map(lambda x: (x - averages[i]) * (x - averages[i]), scoresList[i]) for i in range(len(scoresList))] stdDevs = [math.sqrt(sum(variances[i]) / len(scoresList[i])) for i in range(len(scoresList))] activityStdDev = stdDevs[0] socialStdDev = stdDevs[1] focusStdDev = stdDevs[2] print "Averages (activity, social, focus):" print averages print "Standard Deviations (activity, social, focus):" print stdDevs for profile in [p for p in profiles if p.uuid in activityScores.keys()]: print "storing %s" % profile.uuid internalDataStore = getInternalDataStore(profile, "") data[profile.uuid] = [] #pdb.set_trace() data[profile.uuid].append({ "key": "activity", "layer": "User", "value": activityScores.get(profile.uuid, 0) }) data[profile.uuid].append({ "key": "social", "layer": "User", "value": socialScores.get(profile.uuid, 0) }) data[profile.uuid].append({ "key": "focus", "layer": "User", "value": focusScores.get(profile.uuid, 0) }) data[profile.uuid].append({ "key": "activity", "layer": "averageLow", "value": max(0, averages[0] - stdDevs[0])}) data[profile.uuid].append({ "key": "social", "layer": "averageLow", "value": max(0, averages[1] - stdDevs[1]) }) data[profile.uuid].append({ "key": "focus", "layer": "averageLow", "value": max(0, averages[2] - stdDevs[2]) }) data[profile.uuid].append({ "key": "activity", "layer": "averageHigh", "value": min(averages[0] + stdDevs[0], 10) }) data[profile.uuid].append({ "key": "social", "layer": "averageHigh", "value": min(averages[1] + stdDevs[1], 10) }) data[profile.uuid].append({ "key": "focus", "layer": "averageHigh", "value": min(averages[2] + stdDevs[2], 10) }) internalDataStore.saveAnswer("socialhealth", data[profile.uuid]) # After we're done, re-compute the time graph data to include zeros for blanks # not ideal to compute this twice, but it gets the job done recentActivityLevels(True) # Purposely excluding social and focus scores - blanks are includede in their calculations as blank could imply actual zeroes, rather than missing data #recentSocialLevels(True) #recentFocusLevels(True) return data
def aggregateForAllUsers(answerKey, timeRanges, aggregator, includeBlanks = False, mean = None, dev = None): profiles = Profile.objects.all() aggregates = {} for profile in profiles: # NOTE: need a means of getting at a token for authorizing this task to run. For now, we're not checking anyway, so it's blank internalDataStore = getInternalDataStore(profile, "") # if mean is None or dev is None: data = aggregateForUser(profile, internalDataStore, answerKey, timeRanges, aggregator, includeBlanks) # else: # data = aggregateForUser(profile, answerKey, timeRanges, aggregator, includeBlanks, mean.get(profile.uuid), dev.get(profile.uuid)) if data is not None and len(data) > 0: aggregates[profile.uuid] = data return aggregates
def findMeetups(owner_uuid="280e418a-8032-4de3-b62a-ad173fea4811", participant_uuids=["5241576e-43da-4b08-8a71-b477f931e021", "72d9d8e3-3a57-4508-9515-2b881afc0d8e"], description="", token="b3dbac8916"): participant_places = {} owner = Profile.objects.get(uuid = owner_uuid) internalDataStore = getInternalDataStore(owner, token) owner_places = internalDataStore.getAnswerList("RecentPlaces")[0]["value"] for uuid in participant_uuids: url = "http://working-title.media.mit.edu:8004/api/personal_data/answerlist/?key=RecentPlaces&datastore_owner__uuid=%s&bearer_token=%s"%(uuid, token) headers = { "content-type": "application/json" } requester_places = requests.get(url, headers = headers) print "Meetup between %s and %s"%(owner_uuid, uuid) if requester_places.status_code == requests.codes.ok: print requester_places.json() participant_places[uuid] = requester_places.json()["objects"][0]["value"] min_score = 9999999999 #proxy for int_max or whatever Python calls it min_score_key = None meeting_point = None participant_locations = [] for place in [p for p in owner_places if p["key"] not in ["work", "home"]]: #print participant_places places_for_key = [p for uid in participant_uuids for p in participant_places[uid] if p["key"] == place["key"]] places_for_key.append(place) score_for_key, point_for_key = scoreMeetup(places_for_key) if score_for_key < min_score: print "%s < %s" % (score_for_key, min_score) min_score = score_for_key min_score_key = place["key"] meeting_point = point_for_key participant_locations = [(p["bounds"][0], p["bounds"][1]) for p in places_for_key] print "Best Time: %s" % min_score_key print "Meeting point: %s,%s" % meeting_point print participant_locations answer = internalDataStore.getAnswerList("Meetups") answer = answer[0]["value"] if answer is not None and answer.count() > 0 else [] answer = [v for v in answer if "description" in v and v["description"] != description] answer.append({"description": description, "participants": participant_uuids, "hour": min_score_key, "place": meeting_point}) internalDataStore.saveAnswer("Meetups", answer)
def recentProbeDataScores(): profiles = Profile.objects.all() for profile in profiles: startTime = socialhealth_tasks.getStartTime(6, True) currentTime = time.time() timeRanges = [(start, start + 3600) for start in range(int(startTime), int(currentTime), 3600)] probeAnswerKeys = {'recentActivityProbeByHour': 'ActivityProbe', 'recentSmsProbeByHour': 'SmsProbe', 'recentCallLogProbeByHour': 'CallLogProbe', 'recentBluetoothProbeByHour': 'BluetoothProbe', 'recentWifiProbeByHour': 'WifiProbe', 'recentSimpleLocationProbeByHour': 'LocationProbe', 'recentRunningApplicationsProbeByHour': 'RunningApplicationsProbe', 'recentHardwareInfoProbeByHour': 'HardwareInfoProbe', 'recentAppUsageProbeByHour': 'AppUsageProbe'} # print profile # token = socialhealth_tasks.getToken(profile, "app-uuid") # internalDataStore = socialhealth_tasks.getInternalDataStore(profile, "Living Lab", "Social Health Tracker", token) internalDataStore = getInternalDataStore(profile, "Living Lab", "MIT-FIT", "") #for testing, currently use the following user #if profile.uuid == "341cc5cd-0f42-45f1-9f66-273ac3ed8b2e": for probeAnswerKey, probe in probeAnswerKeys.iteritems(): probeLevels = aggregateForUser(probe, internalDataStore, probeAnswerKey, timeRanges, probeForTimeRange, False)
def aggregateForAllUsers(answerKey, timeRanges, aggregator, serviceId, includeBlanks=False, mean=None, dev=None): profiles = Profile.objects.all() aggregates = {} for profile in profiles: # NOTE: need a means of getting at a token for authorizing this task to run. For now, we're not checking anyway, so it's blank internalDataStore = getInternalDataStore(profile, "Living Lab", "Social Health Tracker", "") # if mean is None or dev is None: data = aggregateForUser(internalDataStore, answerKey, timeRanges, aggregator, includeBlanks) # else: # data = aggregateForUser(profile, answerKey, timeRanges, aggregator, includeBlanks, mean.get(profile.uuid), dev.get(profile.uuid)) if data is not None and len(data) > 0: aggregates[profile.uuid] = data return aggregates
def findRecentPlaces(): currentTime = time.time() today = date.fromtimestamp(currentTime) startTime = time.mktime((today - timedelta(days=14)).timetuple()) profiles = Profile.objects.all() for profile in profiles: ids = getInternalDataStore(profile, "Living Lab", "My Places", "") ids.saveAnswer("RecentPlaces", []) # Note: we're not taking the full 9-5 sampling. Clustering is expensive, so anything we can leave out helps... # Combined with the fact that "lunch" time might not be indicative of work locations, this might be more accurate anyway nineToFives = [(nine, nine + 3600*8) for nine in range(int(startTime + 3600*9), int(currentTime), 3600*24)] #nineToFives.extend([(two, two + 3600*2) for two in range(int(startTime + 3600*14), int(currentTime), 3600*24)]) #print "Finding work locations..." data = findRecentPlaceBounds("work", nineToFives) midnightToSixes = [(midnight, midnight + 3600*6) for midnight in range(int(startTime), int(currentTime), 3600* 24)] #print "Finding home locations..." data = findRecentPlaceBounds("home", midnightToSixes) print "... done with RecentPlaces" return data
def findRecentPlaceBounds(recentPlaceKey, timeRanges, numPlaces=1, answerKey="RecentPlaces"): profiles = Profile.objects.all() data = {} for profile in profiles: # TODO: figure out how to get at a token here... internalDataStore = getInternalDataStore(profile, "Living Lab", "My Places", "") #dbName = profile.getDBName() #collection = connection[dbName]["funf"] locations = [] # An explanation for why we're doing things the way we are below # (there are a few obvious strategies for finding places in location data): # 1) Naive approach - take all location samples in all time ranges, find clusters within them, # take the one with the most points in it. # 2) Faster, but more complicated - do 1) for each time range individually to get candidate regions. # Loop over candidate regions, collapsing and "voting" for those that overlap. Take the one with the most votes. # Notes: This is essentially 2-levels of clustering with the simplification that overlapping regions would # have been clustered together anyway (ie; bounding boxes should be similar, but not the same, as strategy 1) # Pros: Faster - each clustering is limited to 100 entries. In practice, this is more than enough. # If this poses an issue, time ranges can be chosen more carefully (more / shorter time ranges) # Cons: Bounding boxes aren't the same as 1). In particular, two candidate boxes may not overlap, but should # have been clustered together anyway. # 3) Binning pre-process - Same as 1), but perform a binning pre-process on the location data, collapsing multiple # samples into single entries, with associaated weights. # Notes: This is essentially a lower-resolution version of strategy 1. Bounding boxes should be lower-resolution # versions of those from strategy 1. # Pros: Bounding boxes should be the same as #1. Takes into account all entries when clustering. # Cons: Less fine-grained control over the number of entries per cluster than #2. In particular, for sparse # location data, this may not reduce the number of entries we must cluster. # The following is an implementation of method #2: potentialRegions = [] #pdb.set_trace() for timeRange in timeRanges: # NOTE: is a limit on the number of entries still necessary, if we're choosing the timeRanges carefully? values = [ entry["value"] for entry in internalDataStore.getData( "LocationProbe", timeRange[0], timeRange[1]) or [] ] # Use all locations except the most gratuitously inaccurate ones values = [ value for value in values if float(value["maccuracy"]) < 100 ] clusters = clusterFunfLocations(values, 100) if (len(clusters) > 0): #clusters.sort(key = lambda cluster: -len(cluster)) #topClusters = clusters[:min(len(clusters), numPlaces)] clusterLocations = max(clusters, key=lambda cluster: len(cluster)) if isinstance(clusterLocations, list): lats = [loc[0] for loc in clusterLocations] longs = [loc[1] for loc in clusterLocations] if min(lats) != max(lats) and min(longs) != max(longs): #Only add regions that aren't degenerate (single points) potentialRegions.append( [min(lats), min(longs), max(lats), max(longs)]) if len(potentialRegions) > 0: overlaps = [{ "region": r1, "overlapList": [ r2 for r2 in potentialRegions if r2 is not r1 and boundsOverlap(r1, r2) ] } for r1 in potentialRegions] reduced = [{ "region": reduce(lambda r1, r2: mergeBoxes(r1, r2), r["overlapList"], r["region"]), "votes": len(r["overlapList"]) } for r in overlaps] reduced.sort(key=lambda r: -r["votes"]) final = [] for r in reduced: if not listContainsOverlap([f["region"] for f in final], r["region"]): final.append(r) mostOverlap = final[:min(len(final), numPlaces)] mostVoted = [r["region"] for r in mostOverlap] if numPlaces == 1: mostVoted = mostVoted[0] answer = internalDataStore.getAnswerList(answerKey) answer = answer[0]["value"] if answer.count() > 0 else [] data[profile.uuid] = [ datum for datum in answer if datum["key"] != recentPlaceKey ] data[profile.uuid].append({ "key": recentPlaceKey, "bounds": mostVoted }) answer = data[profile.uuid] internalDataStore.saveAnswer(answerKey, answer) return data
def recentSocialHealthScores(): profiles = Profile.objects.all() data = {} activityScores = recentActivityScore() socialScores = recentSocialScore() focusScores = recentFocusScore() scoresList = [ activityScores.values(), socialScores.values(), focusScores.values() ] print scoresList # scoresList = [[d for d in scoreList if d > 0.0] for scoreList in scoresList] averages = [ sum(scores) / len(scores) if len(scores) > 0 else 0 for scores in scoresList ] variances = [ map(lambda x: (x - averages[i]) * (x - averages[i]), scoresList[i]) for i in range(len(scoresList)) ] stdDevs = [ math.sqrt(sum(variances[i]) / len(scoresList[i])) for i in range(len(scoresList)) ] activityStdDev = stdDevs[0] socialStdDev = stdDevs[1] focusStdDev = stdDevs[2] print "Averages (activity, social, focus):" print averages print "Standard Deviations (activity, social, focus):" print stdDevs for profile in [p for p in profiles if p.uuid in activityScores.keys()]: print "storing %s" % profile.uuid internalDataStore = getInternalDataStore(profile, "Living Lab", "Social Health Tracker", "") data[profile.uuid] = [] #pdb.set_trace() data[profile.uuid].append({ "key": "activity", "layer": "User", "value": activityScores.get(profile.uuid, 0) }) data[profile.uuid].append({ "key": "social", "layer": "User", "value": socialScores.get(profile.uuid, 0) }) data[profile.uuid].append({ "key": "focus", "layer": "User", "value": focusScores.get(profile.uuid, 0) }) data[profile.uuid].append({ "key": "activity", "layer": "averageLow", "value": max(0, averages[0] - stdDevs[0]) }) data[profile.uuid].append({ "key": "social", "layer": "averageLow", "value": max(0, averages[1] - stdDevs[1]) }) data[profile.uuid].append({ "key": "focus", "layer": "averageLow", "value": max(0, averages[2] - stdDevs[2]) }) data[profile.uuid].append({ "key": "activity", "layer": "averageHigh", "value": min(averages[0] + stdDevs[0], 10) }) data[profile.uuid].append({ "key": "social", "layer": "averageHigh", "value": min(averages[1] + stdDevs[1], 10) }) data[profile.uuid].append({ "key": "focus", "layer": "averageHigh", "value": min(averages[2] + stdDevs[2], 10) }) internalDataStore.saveAnswer("socialhealth", data[profile.uuid]) # After we're done, re-compute the time graph data to include zeros for blanks # not ideal to compute this twice, but it gets the job done recentActivityLevels(True) # Purposely excluding social and focus scores - blanks are includede in their calculations as blank could imply actual zeroes, rather than missing data #recentSocialLevels(True) #recentFocusLevels(True) return data
def recentSocialHealthScores2(): profiles = Profile.objects.all() startTime = getStartTime(6, True) currentTime = time.time() timeRanges = [ (start, start + 3600 * 4) for start in range(int(startTime), int(currentTime), 3600 * 4) ] sums = {"activity": 0, "social": 0, "focus": 0} activeUsers = [] data = {} for profile in profiles: token = getToken(profile, "app-uuid") internalDataStore = getInternalDataStore(profile, "Living Lab", "Social Health Tracker", token) activityLevels = aggregateForUser(internalDataStore, "RecentActivityByHour", timeRanges, activityForTimeRange, False) if len(activityLevels) > 0: socialLevels = aggregateForUser(internalDataStore, "RecentSocialByHour", timeRanges, socialForTimeRange, True) focusLevels = aggregateForUser(internalDataStore, "RecentFocusByHour", timeRanges, focusForTimeRange, True) activityScore = computeActivityScore(activityLevels) socialScore = computeSocialScore(socialLevels) focusScore = computeFocusScore(focusLevels) sums["activity"] += activityScore sums["social"] += socialScore sums["focus"] += focusScore activeUsers.append(profile) data[profile.uuid] = {} data[profile.uuid]["user"] = { "activity": activityScore, "social": socialScore, "focus": focusScore } numUsers = len(activeUsers) if numUsers > 0: averages = {k: sums[k] / numUsers for k in sums} variances = { k: [(data[p.uuid]["user"][k] - averages[k])**2 for p in activeUsers] for k in averages } stdDevs = { k: math.sqrt(sum(variances[k]) / len(variances[k])) for k in variances } for profile in activeUsers: token = getToken(profile, "app-uuid") internalDataStore = getInternalDataStore(profile, "Living Lab", "Social Health Tracker", token) data[profile.uuid]["averageLow"] = { k: max(0, averages[k] - stdDevs[k]) for k in stdDevs } data[profile.uuid]["averageHigh"] = { k: min(averages[k] + stdDevs[k], 10) for k in stdDevs } internalDataStore.saveAnswer("socialhealth", data[profile.uuid]) return data
def findRecentPlaceBounds(recentPlaceKey, timeRanges, numPlaces=1, answerKey="RecentPlaces"): profiles = Profile.objects.all() data = {} for profile in profiles: # TODO: figure out how to get at a token here... internalDataStore = getInternalDataStore(profile, "Living Lab", "My Places", "") #dbName = profile.getDBName() #collection = connection[dbName]["funf"] locations = [] # An explanation for why we're doing things the way we are below # (there are a few obvious strategies for finding places in location data): # 1) Naive approach - take all location samples in all time ranges, find clusters within them, # take the one with the most points in it. # 2) Faster, but more complicated - do 1) for each time range individually to get candidate regions. # Loop over candidate regions, collapsing and "voting" for those that overlap. Take the one with the most votes. # Notes: This is essentially 2-levels of clustering with the simplification that overlapping regions would # have been clustered together anyway (ie; bounding boxes should be similar, but not the same, as strategy 1) # Pros: Faster - each clustering is limited to 100 entries. In practice, this is more than enough. # If this poses an issue, time ranges can be chosen more carefully (more / shorter time ranges) # Cons: Bounding boxes aren't the same as 1). In particular, two candidate boxes may not overlap, but should # have been clustered together anyway. # 3) Binning pre-process - Same as 1), but perform a binning pre-process on the location data, collapsing multiple # samples into single entries, with associaated weights. # Notes: This is essentially a lower-resolution version of strategy 1. Bounding boxes should be lower-resolution # versions of those from strategy 1. # Pros: Bounding boxes should be the same as #1. Takes into account all entries when clustering. # Cons: Less fine-grained control over the number of entries per cluster than #2. In particular, for sparse # location data, this may not reduce the number of entries we must cluster. # The following is an implementation of method #2: potentialRegions = [] #pdb.set_trace() for timeRange in timeRanges: # NOTE: is a limit on the number of entries still necessary, if we're choosing the timeRanges carefully? values = [entry["value"] for entry in internalDataStore.getData("LocationProbe", timeRange[0], timeRange[1]) or []] # Use all locations except the most gratuitously inaccurate ones values = [value for value in values if float(value["maccuracy"]) < 100] clusters = clusterFunfLocations(values, 100) if (len(clusters) > 0): #clusters.sort(key = lambda cluster: -len(cluster)) #topClusters = clusters[:min(len(clusters), numPlaces)] clusterLocations = max(clusters, key= lambda cluster: len(cluster)) if isinstance(clusterLocations, list): lats = [loc[0] for loc in clusterLocations] longs = [loc[1] for loc in clusterLocations] if min(lats) != max(lats) and min(longs) != max(longs): #Only add regions that aren't degenerate (single points) potentialRegions.append([min(lats), min(longs), max(lats), max(longs)]) if len(potentialRegions) > 0: overlaps = [{ "region": r1, "overlapList": [r2 for r2 in potentialRegions if r2 is not r1 and boundsOverlap(r1, r2)]} for r1 in potentialRegions] reduced = [{ "region": reduce(lambda r1, r2: mergeBoxes(r1,r2), r["overlapList"], r["region"]), "votes": len(r["overlapList"])} for r in overlaps] reduced.sort(key = lambda r: -r["votes"]) final = [] for r in reduced: if not listContainsOverlap([f["region"] for f in final], r["region"]): final.append(r) mostOverlap = final[:min(len(final), numPlaces)] mostVoted = [r["region"] for r in mostOverlap] if numPlaces == 1: mostVoted = mostVoted[0] answer = internalDataStore.getAnswerList(answerKey) answer = answer[0]["value"] if answer.count() > 0 else [] data[profile.uuid] = [datum for datum in answer if datum["key"] != recentPlaceKey] data[profile.uuid].append({ "key": recentPlaceKey, "bounds": mostVoted}) answer = data[profile.uuid] internalDataStore.saveAnswer(answerKey, answer) return data
def get_internal_datastore(self, request): if request and "datastore_owner__uuid" in request.GET and "bearer_token" in request.GET: profile, created = Profile.objects.get(uuid = request.GET["datastore_owner__uuid"]) token = request.GET["bearer_token"] return getInternalDataStore(profile, token) return None