Example #1
0
def top_friends_task(user_id):
    """
    Pulls all friends and all photo tags, combines them
    to get `top_friends_score` and saves to the `User.friends` field
    """
    # Run the child tasks
    friends_task =          run_fql.s(task_cls=GetFriendsTask, user_id=user_id)
    tagged_with_me_task =   run_fql.s(task_cls=TaggedWithMeTask, user_id=user_id)
    job_async = group([friends_task, tagged_with_me_task]).apply_async()

    results = job_async.get()
    results = merge_dicts(*results)

    all_friends = results['get_friends']
    tagged_with_me = results['tagged_with_me']

    # Collapse the tags by user_id, discount by age
    tag_score_by_user_id = defaultdict(lambda: 0.0)
    for tag in tagged_with_me.fields:
        tag_age = datetime.date.today().year - tag['created'].year + 1.0
        tag_score_by_user_id[tag['subject']] += 1 / tag_age

    # Sort
    user_ids_in_score_order = sorted(tag_score_by_user_id.items(), key=lambda x: x[1])

    # Reversing them means the index corresponds to top friends order
    top_friends_order_by_id = {}
    for top_friends_order, u in enumerate(user_ids_in_score_order):
        top_friends_order_by_id[u[0]] = top_friends_order + 1   # 0 is not a valid value

    # Copy `top_friends_order`s to `all_friends`
    for id, top_friends_order in top_friends_order_by_id.items():
        try:
            all_friends.fields_by_id[id]['top_friends_order'] = top_friends_order
        except KeyError:
            # Means you were tagged with someone you aren't friends with
            pass

    # Save
    user = User.objects.get(id=user_id)
    # Clear current friends
    FacebookFriend.objects(user=user).delete()
    # Bulk insert the new ones
    docs = []
    for friend in results['get_friends'].fields:
        uid = friend.pop('id')
        docs.append(FacebookFriend(uid=uid, user=user, **friend))
    FacebookFriend.objects.insert(docs)

    logger.info('Pulled %d friends, %d top friends for user %s'
                % (len(all_friends), len(top_friends_order_by_id), user.get_id_str()))

    return results
Example #2
0
    def join_on_field(self, other_getter, map_fxn=None, new_field_name=None,
                      join_field='id', join_field_1=None, join_field_2=None, discard_orphans=True):
        """
        Joins this getter to another by indexing on a field
        and applying a mapping function to generate new outputs
        discard_orphans :   do we discard elements in one that
                            don't match the other?
        """
        if join_field != 'id' or join_field_1 or join_field_2:
            join_field_1 = join_field_1 or join_field
            join_field_2 = join_field_2 or join_field
            getter_by_join_field = {element[join_field_1]: element for element in self.fields}
            other_by_join_field = {element[join_field_2]: element for element in other_getter.fields}
        else:
            getter_by_join_field = self._fields_by_id
            other_by_join_field = other_getter._fields_by_id
        keys_in_both = set(getter_by_join_field) & set(other_by_join_field)
        # Run the mapping function
        if map_fxn:
            joined = [{
                          new_field_name: map_fxn(getter_by_join_field[key], other_by_join_field[key]),
                          # join field is same in both, by definition
                          join_field: getter_by_join_field[key][join_field]
                      } for key in keys_in_both]
        # If no mapping function, just take all existing fields
        else:
            joined = [merge_dicts(getter_by_join_field[key], other_by_join_field[key])
                      for key in keys_in_both]

        if not discard_orphans:
            # Append the orphans
            # Note that this may cause problems if the
            # mapping function introduced any new fields
            [joined.append(getter_by_join_field[key])
             for key in set(getter_by_join_field) - keys_in_both]
            [joined.append(other_by_join_field[key])
             for key in set(other_by_join_field) - keys_in_both]

        # Return a new getter
        return self.from_fields(joined)