Ejemplo n.º 1
0
    async def location_handler(message: types.Message):
        lat = message.location.latitude
        lon = message.location.longitude
        user = await db.users.find_one({'uid': message.from_user.id})

        if user is None:
            return await start_handler(message)

        if user['stage'] != Stage.geo:
            return  # ignore

        if (get_spherical_distance(lat, lon, config.LAT, config.LON) > config.RADIUS) or \
                (message.forward_from is not None):
            return await message.reply(t('GEO_FAILED', locale=user['lang']), reply_markup=keyboards.get_geo_kbd())
        else:
            await db.users.find_one_and_update({'uid': message.from_user.id}, {'$set': {'stage': Stage.menu}})
            await message.reply(t('GEO_SUCCESS', locale=user['lang']),
                                reply_markup=types.ReplyKeyboardRemove())
            await aq.aapi.add_user_to_queue(user['get_queue'], user['uid'])

            user_data = await aq.aapi.get_user_info(user['uid'])
            queues = user_data['queues']
            queue_id = user['get_queue']
            queue = list(filter(lambda x: queue_id == x['id'], queues))[0]

            await message.answer(t('USER_QUEUE_INFO', locale=user['lang'], queue_name=queue['name'],
                                   pos=queue['position']['relativePosition']),
                                 reply_markup=keyboards.get_update_my_queue_kbd(queue_id,
                                                                                user['lang']),
                                 parse_mode=types.ParseMode.HTML)
Ejemplo n.º 2
0
 def test_proximity_sort(self):
     genericCollectionSlice                = HTTPGenericCollectionSlice()
     center                      = (40.73, -73.99) # ~NYC
     genericCollectionSlice.sort           = "proximity"
     genericCollectionSlice.offset         = 0
     genericCollectionSlice.limit          = 10
     genericCollectionSlice.coordinates    = "%s,%s" % (center[0], center[1])
     
     ret0 = self._getStamps(genericCollectionSlice)
     self.assertLength(ret0, 9)
     
     earthRadius = 3959.0 # miles
     prev_dist   = -earthRadius
     
     # ensure results are approximately sorted by distance
     for s in ret0:
         coords = (s.entity.coordinates.lat, s.entity.coordinates.lng)
         dist   = utils.get_spherical_distance(center, coords) * earthRadius
         
         # allow a one-mile fudge factor because we're using spherical 
         # distance here, and the distance calculation when sorting 
         # results is less precise but faster to calculate (L2 norm)
         self.assertTrue(dist >= prev_dist - 1)
         prev_dist = dist
Ejemplo n.º 3
0
 def getSuggestedUserIds(self, userId, request):
     """
         Returns personalized user suggestions based on several weighting 
         signals, namely:  friend overlap, stamp overlap, stamp category 
         overlap, geographical proximity of stamp clusters, FB / Twitter 
         friendship, as well as several smaller quality signals.
     """
     
     # TODO: support ignoring a friend suggestion
     # TODO: ignore previously followed friends that you've since unfollowed
     # TODO: better support for new users w/out stamps or friends
     
     friends_of_friends  = {}
     visited_users       = set()
     pruned              = set()
     todo                = []
     max_distance        = 2
     count               = 0
     friends             = None
     coords              = None
     
     if request.coordinates is not None and request.coordinates.lat is not None and request.coordinates.lng is not None:
         coords = (request.coordinates.lat, request.coordinates.lng)
     
     def visit_user(user_id, distance):
         if user_id in visited_users:
             return
         
         if distance == max_distance:
             try:
                 count = friends_of_friends[user_id]
                 friends_of_friends[user_id] = count + 1
             except Exception:
                 friends_of_friends[user_id] = 1
         else:
             visited_users.add(user_id)
             heapq.heappush(todo, (distance, user_id))
     
     # seed the algorithm with the initial user at distance 0
     visit_user(userId, 0)
     
     while True:
         try:
             distance, user_id = heapq.heappop(todo)
         except IndexError:
             break # heap is empty
         
         if distance < max_distance:
             friend_ids  = self.getFriends(user_id)
             distance    = distance + 1
             
             if friends is None:
                 friends = set(friend_ids)
                 friends.add(userId)
             
             for friend_id in friend_ids:
                 visit_user(friend_id, distance)
     
     potential_friends = defaultdict(dict)
     
     total  = sum(friends_of_friends.itervalues())
     weight = 1.0 / total if total > 0 else 0.0
     
     for user_id, friend_overlap in friends_of_friends.iteritems():
         if friend_overlap > 1:
             value = (friend_overlap ** 3) * weight
             
             potential_friends[user_id]['num_friend_overlap'] = friend_overlap
             potential_friends[user_id]['friend_overlap']     = value
     
     user_entity_ids, user_categories, user_clusters, user = self._get_stamp_info(userId)
     inv_len_user_entity_ids = len(user_entity_ids)
     inv_len_user_entity_ids = 1.0 / inv_len_user_entity_ids if inv_len_user_entity_ids > 0 else 0.0
     
     #for cluster in user_clusters:
     #    print "(%s) %d %s" % (cluster['avg'], len(cluster['data']), cluster['data'])
     
     # seed potential friends with users who have stamped at least one of the same entities
     for entity_id in user_entity_ids:
         stamps = self.stamp_collection.getStampsForEntity(entity_id, limit=200)
         
         for stamp in stamps:
             user_id = stamp.user.user_id
             
             if user_id not in friends:
                 try:
                     potential_friends[user_id]['num_stamp_overlap'] = potential_friends[user_id]['num_stamp_overlap'] + 1
                 except Exception:
                     potential_friends[user_id]['num_stamp_overlap'] = 1
     
     # seed potential friends with facebook friends
     if request.facebook_token is not None:
         facebook_friends = self.api._getFacebookFriends(request.facebook_token)
         
         for friend in facebook_friends:
             user_id = friend.user_id
             
             if user_id not in friends:
                 potential_friends[user_id]['facebook_friend'] = True
     
     # seed potential friends with twitter friends
     if request.twitter_key is not None and request.twitter_secret is not None:
         twitter_friends = self.api._getTwitterFriends(request.facebook_token)
         
         for friend in twitter_friends:
             user_id = friend.user_id
             
             if user_id not in friends:
                 potential_friends[user_id]['twitter_friend'] = True
     
     # process each potential friend
     for user_id, values in potential_friends.iteritems():
         try:
             if user_id in self._suggested:
                 raise
             
             if 'num_friend_overlap' not in values and 'facebook_friend' not in values and 'twitter_friend' not in values and values['num_stamp_overlap'] <= 1:
                 raise
         except Exception:
             pruned.add(user_id)
             continue
         
         count = count + 1
         entity_ids, categories, clusters, friend = self._get_stamp_info(user_id)
         overlap = 0
         
         try:
             overlap = values['num_stamp_overlap']
             values['stamp_overlap'] = overlap * overlap * inv_len_user_entity_ids
         except Exception:
             pass
         
         summation = 0.0
         for category in [ 'place', 'music', 'film', 'book', 'app', 'other' ]:
             diff = user_categories[category] - categories[category]
             summation += diff * diff
         
         category_dist = 1.0 - math.sqrt(summation)
         values['category_overlap'] = category_dist
         
         earthRadius = 3959.0 # miles
         sum0    = len(user_entity_ids)
         sum1    = len(entity_ids)
         sum0    = 1.0 / sum0 if sum0 > 0 else 0.0
         sum1    = 1.0 / sum1 if sum1 > 0 else 0.0
         score   = -1
         max_val = [ (0, None), (0, None) ]
         
         # compare seed user's stamp clusters with this user's stamp clusters
         for cluster0 in user_clusters:
             ll0  = cluster0['avg']
             len0 = len(cluster0['data']) * sum0
             
             min_dist = 10000000
             min_len  = -1
             
             for cluster1 in clusters:
                 ll1  = cluster1['avg']
                 len1 = len(cluster1['data']) * sum1
                 
                 dist = earthRadius * utils.get_spherical_distance(ll0, ll1)
                 
                 if dist >= 0 and dist < min_dist:
                     min_dist = dist
                     min_len  = len1
             
             if min_len > 0:
                 inv_dist = 1.0 / math.log(min_dist) if min_dist > 1.0 else 0.0
                 value    = len0 * min_len * inv_dist
                 score    = score + value
                 
                 if max_val[0][1] is None or value > max_val[0][0]:
                     if max_val[1][1] is None or value > max_val[1][0]:
                         max_val[0] = max_val[1]
                         max_val[1] = (value, ll0)
                     else:
                         max_val[0] = (value, ll0)
         
         if score >= 0 and len(user_clusters) > 0:
             score = score / len(user_clusters)
         
         if score < 0:
             score = None
         
         values['proximity'] = score
         values['clusters']  = max_val
         
         if coords is not None:
             min_dist = None
             
             for cluster in clusters:
                 ll0  = cluster['avg']
                 #len0 = len(cluster['data']) * sum1
                 dist = earthRadius * utils.get_spherical_distance(coords, ll0)
                 
                 if min_dist is None or dist < min_dist:
                     min_dist = dist
                     values['current_proximity'] = dist
         
         num_stamps  = friend.num_stamps if 'num_stamps' in friend else 0
         num_stamps -= overlap
         
         values['has_stamps'] = (num_stamps >= 1)
         values['num_stamps'] = math.log(num_stamps) if num_stamps >= 1 else 0.0
     
     logs.info("potential friends:  %d" % len(potential_friends))
     logs.info("friends of friends: %d" % len(friends_of_friends))
     logs.info("processed: %d; pruned: %d" % (count, len(pruned)))
     
     limit  = request.limit  if request.limit  is not None else 10
     offset = request.offset if request.offset is not None else 0
     
     if len(pruned) > 0 and len(potential_friends) - len(pruned) >= offset + limit:
         logs.debug("pruning %d potential friends (out of %d)" % (len(pruned), len(potential_friends)))
         potential_friends = dict(filter(lambda f: f[0] not in pruned, potential_friends.iteritems()))
         logs.debug("removed %d potential friends (now %d)" % (len(pruned), len(potential_friends)))
     
     """
     # debugging utility to view top scores across certain categories
     def print_top(key, reverse=True, default=-1):
         print "%s %s %s" % ("-" * 40, key, "-" * 40)
         users2 = sorted(potential_friends.iteritems(), key=lambda kv: kv[1][key] if key in kv[1] else default, reverse=True)[:10]
         
         for user in users2:
             import pprint as p
             p.pprint(user)
     
     print_top('friend_overlap')
     print_top('stamp_overlap')
     print_top('category_overlap')
     print_top('proximity')
     """
     
     # TODO: optimize this sorted loop to only retain the top n results?
     users  = sorted(potential_friends.iteritems(), key=self._get_potential_friend_score, reverse=True)
     users  = users[offset : offset + limit]
     
     func   = lambda kv: (kv[0], self._get_potential_friend_score(kv, explain=True, coords=coords)[1])
     return map(func, users)
Ejemplo n.º 4
0
 def _get_stamp_info(self, user_id):
     """
         Processes a single user, returning aggregate statistics about their 
         stamp behavior, including all entity_id's that the user's stamped, 
         a histogram of the categories those stamps fall into, a description 
         of their geographical stamp clusters, and the user object itself.
     """
     
     stampIds    = self.collection_collection.getUserStampIds(user_id)
     stamps      = self.stamp_collection.getStamps(stampIds, limit=1000, sort='modified')
     user        = self.user_collection.getUser(user_id)
     
     categories  = defaultdict(int)
     num_stamps  = len(stamps)
     entity_ids  = frozenset(s.entity_id for s in stamps)
     
     for stamp in stamps:
         categories[stamp.entity.category] = categories[stamp.entity.category] + 1.0 / num_stamps
     
     earthRadius = 3959.0 # miles
     clusters    = [ ]
     trivial     = True
     
     # find stamp clusters
     for stamp in stamps:
         if stamp.lat is not None and stamp.lng is not None:
             found_cluster = False
             ll = [ stamp.lat, stamp.lng ]
             #print "%s) %s" % (stamp.title, ll)
             
             for cluster in clusters:
                 dist = earthRadius * utils.get_spherical_distance(ll, cluster['avg'])
                 #print "%s) %s vs %s => %s (%s)" % (stamp.title, ll, cluster['avg'], dist, cluster['data'])
                 
                 if dist < 10:
                     cluster['data'].append(stamp.title)
                     
                     len_cluster   = len(cluster['data'])
                     found_cluster = True
                     trivial       = False
                     
                     cluster['sum'][0] = cluster['sum'][0] + ll[0]
                     cluster['sum'][1] = cluster['sum'][1] + ll[1]
                     
                     cluster['avg'][0] = cluster['sum'][0] / len_cluster
                     cluster['avg'][1] = cluster['sum'][1] / len_cluster
                     
                     #print "%s) %d %s" % (stamp.title, len_cluster, cluster)
                     break
             
             if not found_cluster:
                 clusters.append({
                     'avg'  : [ ll[0], ll[1] ], 
                     'sum'  : [ ll[0], ll[1] ], 
                     'data' : [ stamp.title ], 
                 })
     
     clusters2 = []
     if trivial:
         clusters2 = clusters
     else:
         # attempt to remove trivial clusters as outliers
         for cluster in clusters:
             if len(cluster['data']) > 1:
                 clusters2.append(cluster)
     
     return entity_ids, categories, clusters2, user
Ejemplo n.º 5
0
 def get_clusters(self, entities, limit=None):
     earthRadius = 3959.0 # miles
     threshold   = 10.0
     clusters    = [ ]
     trivial     = True
     
     # find entity clusters
     for entity in entities:
         found_cluster = False
         coords = entity.coordinates
         
         if coords is None:
             continue
         
         # TODO: really should be retaining this for stamps overall instead of just subset here...
         
         ll = [ coords.lat, coords.lng ]
         
         for cluster in clusters:
             dist = earthRadius * utils.get_spherical_distance(ll, cluster['avg'])
             
             if dist < threshold:
                 cluster['data'].append((ll[0], ll[1]))
                 
                 len_cluster   = len(cluster['data'])
                 found_cluster = True
                 trivial       = False
                 
                 cluster['sum'][0] = cluster['sum'][0] + ll[0]
                 cluster['sum'][1] = cluster['sum'][1] + ll[1]
                 4
                 cluster['avg'][0] = cluster['sum'][0] / len_cluster
                 cluster['avg'][1] = cluster['sum'][1] / len_cluster
                 
                 break
         
         if not found_cluster:
             clusters.append({
                 'avg'  : [ ll[0], ll[1] ], 
                 'sum'  : [ ll[0], ll[1] ], 
                 'data' : [ (ll[0], ll[1]) ], 
             })
     
     clusters_out = []
     if trivial:
         clusters_out = clusters
     else:
         # attempt to remove trivial clusters as outliers
         for cluster in clusters:
             if len(cluster['data']) > 1:
                 clusters_out.append(cluster)
         
         if len(clusters_out) <= 0:
             clusters_out.append(clusters[0])
     
     if len(clusters) > 0:
         clusters = sorted(clusters_out, key=lambda c: len(c['data']), reverse=True)
         
         #for cluster in clusters:
         #    utils.log(pprint.pformat(cluster))
         
         return clusters[0]
     
     return None