async def location_handler(message: types.Message): lat = message.location.latitude lon = message.location.longitude user = await db.users.find_one({'uid': message.from_user.id}) if user is None: return await start_handler(message) if user['stage'] != Stage.geo: return # ignore if (get_spherical_distance(lat, lon, config.LAT, config.LON) > config.RADIUS) or \ (message.forward_from is not None): return await message.reply(t('GEO_FAILED', locale=user['lang']), reply_markup=keyboards.get_geo_kbd()) else: await db.users.find_one_and_update({'uid': message.from_user.id}, {'$set': {'stage': Stage.menu}}) await message.reply(t('GEO_SUCCESS', locale=user['lang']), reply_markup=types.ReplyKeyboardRemove()) await aq.aapi.add_user_to_queue(user['get_queue'], user['uid']) user_data = await aq.aapi.get_user_info(user['uid']) queues = user_data['queues'] queue_id = user['get_queue'] queue = list(filter(lambda x: queue_id == x['id'], queues))[0] await message.answer(t('USER_QUEUE_INFO', locale=user['lang'], queue_name=queue['name'], pos=queue['position']['relativePosition']), reply_markup=keyboards.get_update_my_queue_kbd(queue_id, user['lang']), parse_mode=types.ParseMode.HTML)
def test_proximity_sort(self): genericCollectionSlice = HTTPGenericCollectionSlice() center = (40.73, -73.99) # ~NYC genericCollectionSlice.sort = "proximity" genericCollectionSlice.offset = 0 genericCollectionSlice.limit = 10 genericCollectionSlice.coordinates = "%s,%s" % (center[0], center[1]) ret0 = self._getStamps(genericCollectionSlice) self.assertLength(ret0, 9) earthRadius = 3959.0 # miles prev_dist = -earthRadius # ensure results are approximately sorted by distance for s in ret0: coords = (s.entity.coordinates.lat, s.entity.coordinates.lng) dist = utils.get_spherical_distance(center, coords) * earthRadius # allow a one-mile fudge factor because we're using spherical # distance here, and the distance calculation when sorting # results is less precise but faster to calculate (L2 norm) self.assertTrue(dist >= prev_dist - 1) prev_dist = dist
def getSuggestedUserIds(self, userId, request): """ Returns personalized user suggestions based on several weighting signals, namely: friend overlap, stamp overlap, stamp category overlap, geographical proximity of stamp clusters, FB / Twitter friendship, as well as several smaller quality signals. """ # TODO: support ignoring a friend suggestion # TODO: ignore previously followed friends that you've since unfollowed # TODO: better support for new users w/out stamps or friends friends_of_friends = {} visited_users = set() pruned = set() todo = [] max_distance = 2 count = 0 friends = None coords = None if request.coordinates is not None and request.coordinates.lat is not None and request.coordinates.lng is not None: coords = (request.coordinates.lat, request.coordinates.lng) def visit_user(user_id, distance): if user_id in visited_users: return if distance == max_distance: try: count = friends_of_friends[user_id] friends_of_friends[user_id] = count + 1 except Exception: friends_of_friends[user_id] = 1 else: visited_users.add(user_id) heapq.heappush(todo, (distance, user_id)) # seed the algorithm with the initial user at distance 0 visit_user(userId, 0) while True: try: distance, user_id = heapq.heappop(todo) except IndexError: break # heap is empty if distance < max_distance: friend_ids = self.getFriends(user_id) distance = distance + 1 if friends is None: friends = set(friend_ids) friends.add(userId) for friend_id in friend_ids: visit_user(friend_id, distance) potential_friends = defaultdict(dict) total = sum(friends_of_friends.itervalues()) weight = 1.0 / total if total > 0 else 0.0 for user_id, friend_overlap in friends_of_friends.iteritems(): if friend_overlap > 1: value = (friend_overlap ** 3) * weight potential_friends[user_id]['num_friend_overlap'] = friend_overlap potential_friends[user_id]['friend_overlap'] = value user_entity_ids, user_categories, user_clusters, user = self._get_stamp_info(userId) inv_len_user_entity_ids = len(user_entity_ids) inv_len_user_entity_ids = 1.0 / inv_len_user_entity_ids if inv_len_user_entity_ids > 0 else 0.0 #for cluster in user_clusters: # print "(%s) %d %s" % (cluster['avg'], len(cluster['data']), cluster['data']) # seed potential friends with users who have stamped at least one of the same entities for entity_id in user_entity_ids: stamps = self.stamp_collection.getStampsForEntity(entity_id, limit=200) for stamp in stamps: user_id = stamp.user.user_id if user_id not in friends: try: potential_friends[user_id]['num_stamp_overlap'] = potential_friends[user_id]['num_stamp_overlap'] + 1 except Exception: potential_friends[user_id]['num_stamp_overlap'] = 1 # seed potential friends with facebook friends if request.facebook_token is not None: facebook_friends = self.api._getFacebookFriends(request.facebook_token) for friend in facebook_friends: user_id = friend.user_id if user_id not in friends: potential_friends[user_id]['facebook_friend'] = True # seed potential friends with twitter friends if request.twitter_key is not None and request.twitter_secret is not None: twitter_friends = self.api._getTwitterFriends(request.facebook_token) for friend in twitter_friends: user_id = friend.user_id if user_id not in friends: potential_friends[user_id]['twitter_friend'] = True # process each potential friend for user_id, values in potential_friends.iteritems(): try: if user_id in self._suggested: raise if 'num_friend_overlap' not in values and 'facebook_friend' not in values and 'twitter_friend' not in values and values['num_stamp_overlap'] <= 1: raise except Exception: pruned.add(user_id) continue count = count + 1 entity_ids, categories, clusters, friend = self._get_stamp_info(user_id) overlap = 0 try: overlap = values['num_stamp_overlap'] values['stamp_overlap'] = overlap * overlap * inv_len_user_entity_ids except Exception: pass summation = 0.0 for category in [ 'place', 'music', 'film', 'book', 'app', 'other' ]: diff = user_categories[category] - categories[category] summation += diff * diff category_dist = 1.0 - math.sqrt(summation) values['category_overlap'] = category_dist earthRadius = 3959.0 # miles sum0 = len(user_entity_ids) sum1 = len(entity_ids) sum0 = 1.0 / sum0 if sum0 > 0 else 0.0 sum1 = 1.0 / sum1 if sum1 > 0 else 0.0 score = -1 max_val = [ (0, None), (0, None) ] # compare seed user's stamp clusters with this user's stamp clusters for cluster0 in user_clusters: ll0 = cluster0['avg'] len0 = len(cluster0['data']) * sum0 min_dist = 10000000 min_len = -1 for cluster1 in clusters: ll1 = cluster1['avg'] len1 = len(cluster1['data']) * sum1 dist = earthRadius * utils.get_spherical_distance(ll0, ll1) if dist >= 0 and dist < min_dist: min_dist = dist min_len = len1 if min_len > 0: inv_dist = 1.0 / math.log(min_dist) if min_dist > 1.0 else 0.0 value = len0 * min_len * inv_dist score = score + value if max_val[0][1] is None or value > max_val[0][0]: if max_val[1][1] is None or value > max_val[1][0]: max_val[0] = max_val[1] max_val[1] = (value, ll0) else: max_val[0] = (value, ll0) if score >= 0 and len(user_clusters) > 0: score = score / len(user_clusters) if score < 0: score = None values['proximity'] = score values['clusters'] = max_val if coords is not None: min_dist = None for cluster in clusters: ll0 = cluster['avg'] #len0 = len(cluster['data']) * sum1 dist = earthRadius * utils.get_spherical_distance(coords, ll0) if min_dist is None or dist < min_dist: min_dist = dist values['current_proximity'] = dist num_stamps = friend.num_stamps if 'num_stamps' in friend else 0 num_stamps -= overlap values['has_stamps'] = (num_stamps >= 1) values['num_stamps'] = math.log(num_stamps) if num_stamps >= 1 else 0.0 logs.info("potential friends: %d" % len(potential_friends)) logs.info("friends of friends: %d" % len(friends_of_friends)) logs.info("processed: %d; pruned: %d" % (count, len(pruned))) limit = request.limit if request.limit is not None else 10 offset = request.offset if request.offset is not None else 0 if len(pruned) > 0 and len(potential_friends) - len(pruned) >= offset + limit: logs.debug("pruning %d potential friends (out of %d)" % (len(pruned), len(potential_friends))) potential_friends = dict(filter(lambda f: f[0] not in pruned, potential_friends.iteritems())) logs.debug("removed %d potential friends (now %d)" % (len(pruned), len(potential_friends))) """ # debugging utility to view top scores across certain categories def print_top(key, reverse=True, default=-1): print "%s %s %s" % ("-" * 40, key, "-" * 40) users2 = sorted(potential_friends.iteritems(), key=lambda kv: kv[1][key] if key in kv[1] else default, reverse=True)[:10] for user in users2: import pprint as p p.pprint(user) print_top('friend_overlap') print_top('stamp_overlap') print_top('category_overlap') print_top('proximity') """ # TODO: optimize this sorted loop to only retain the top n results? users = sorted(potential_friends.iteritems(), key=self._get_potential_friend_score, reverse=True) users = users[offset : offset + limit] func = lambda kv: (kv[0], self._get_potential_friend_score(kv, explain=True, coords=coords)[1]) return map(func, users)
def _get_stamp_info(self, user_id): """ Processes a single user, returning aggregate statistics about their stamp behavior, including all entity_id's that the user's stamped, a histogram of the categories those stamps fall into, a description of their geographical stamp clusters, and the user object itself. """ stampIds = self.collection_collection.getUserStampIds(user_id) stamps = self.stamp_collection.getStamps(stampIds, limit=1000, sort='modified') user = self.user_collection.getUser(user_id) categories = defaultdict(int) num_stamps = len(stamps) entity_ids = frozenset(s.entity_id for s in stamps) for stamp in stamps: categories[stamp.entity.category] = categories[stamp.entity.category] + 1.0 / num_stamps earthRadius = 3959.0 # miles clusters = [ ] trivial = True # find stamp clusters for stamp in stamps: if stamp.lat is not None and stamp.lng is not None: found_cluster = False ll = [ stamp.lat, stamp.lng ] #print "%s) %s" % (stamp.title, ll) for cluster in clusters: dist = earthRadius * utils.get_spherical_distance(ll, cluster['avg']) #print "%s) %s vs %s => %s (%s)" % (stamp.title, ll, cluster['avg'], dist, cluster['data']) if dist < 10: cluster['data'].append(stamp.title) len_cluster = len(cluster['data']) found_cluster = True trivial = False cluster['sum'][0] = cluster['sum'][0] + ll[0] cluster['sum'][1] = cluster['sum'][1] + ll[1] cluster['avg'][0] = cluster['sum'][0] / len_cluster cluster['avg'][1] = cluster['sum'][1] / len_cluster #print "%s) %d %s" % (stamp.title, len_cluster, cluster) break if not found_cluster: clusters.append({ 'avg' : [ ll[0], ll[1] ], 'sum' : [ ll[0], ll[1] ], 'data' : [ stamp.title ], }) clusters2 = [] if trivial: clusters2 = clusters else: # attempt to remove trivial clusters as outliers for cluster in clusters: if len(cluster['data']) > 1: clusters2.append(cluster) return entity_ids, categories, clusters2, user
def get_clusters(self, entities, limit=None): earthRadius = 3959.0 # miles threshold = 10.0 clusters = [ ] trivial = True # find entity clusters for entity in entities: found_cluster = False coords = entity.coordinates if coords is None: continue # TODO: really should be retaining this for stamps overall instead of just subset here... ll = [ coords.lat, coords.lng ] for cluster in clusters: dist = earthRadius * utils.get_spherical_distance(ll, cluster['avg']) if dist < threshold: cluster['data'].append((ll[0], ll[1])) len_cluster = len(cluster['data']) found_cluster = True trivial = False cluster['sum'][0] = cluster['sum'][0] + ll[0] cluster['sum'][1] = cluster['sum'][1] + ll[1] 4 cluster['avg'][0] = cluster['sum'][0] / len_cluster cluster['avg'][1] = cluster['sum'][1] / len_cluster break if not found_cluster: clusters.append({ 'avg' : [ ll[0], ll[1] ], 'sum' : [ ll[0], ll[1] ], 'data' : [ (ll[0], ll[1]) ], }) clusters_out = [] if trivial: clusters_out = clusters else: # attempt to remove trivial clusters as outliers for cluster in clusters: if len(cluster['data']) > 1: clusters_out.append(cluster) if len(clusters_out) <= 0: clusters_out.append(clusters[0]) if len(clusters) > 0: clusters = sorted(clusters_out, key=lambda c: len(c['data']), reverse=True) #for cluster in clusters: # utils.log(pprint.pformat(cluster)) return clusters[0] return None