Beispiel #1
    def join_on_field(self, other_getter, map_fxn=None, new_field_name=None,
                      join_field='id', join_field_1=None, join_field_2=None, discard_orphans=True):
        Joins this getter to another by indexing on a field
        and applying a mapping function to generate new outputs
        discard_orphans :   do we discard elements in one that
                            don't match the other?
        if join_field != 'id' or join_field_1 or join_field_2:
            join_field_1 = join_field_1 or join_field
            join_field_2 = join_field_2 or join_field
            getter_by_join_field = {element[join_field_1]: element for element in self.fields}
            other_by_join_field = {element[join_field_2]: element for element in other_getter.fields}
            getter_by_join_field = self._fields_by_id
            other_by_join_field = other_getter._fields_by_id
        keys_in_both = set(getter_by_join_field) & set(other_by_join_field)
        # Run the mapping function
        if map_fxn:
            joined = [{
                new_field_name: map_fxn(getter_by_join_field[key], other_by_join_field[key]),
                # join field is same in both, by definition
                join_field: getter_by_join_field[key][join_field]
            } for key in keys_in_both]
        # If no mapping function, just take all existing fields
            joined = [merge_dicts(getter_by_join_field[key], other_by_join_field[key])
                      for key in keys_in_both]

        if not discard_orphans:
            # Append the orphans
            # Note that this may cause problems if the
            # mapping function introduced any new fields
                for key in set(getter_by_join_field) - keys_in_both]
                for key in set(other_by_join_field) - keys_in_both]

        # Return a new getter
        return self.from_fields(joined)
Beispiel #2
def get_top_friends_and_groups(results, user):
    This takes input from PhotosOfMe and
    TaggedWithMe, TaggedWithThisYear, and TopPostersFromYear

    For convenience, it fires off the group that depends on PhotosOfMe
    # Flatten the results returned from the group
    results = merge_dicts(*results)

    on_photos_of_me = group([
        get_photos_by_year.subtask((results,user,)) |
        get_top_albums.subtask((user,)) |
    on_photos_of_me_async = on_photos_of_me.apply_async()

    top_friends = results['most_tagged_recently'].join_on_field(
        map_fxn=lambda x, y: x['count'] + y['count'],

    # Get ids of gf/bf and immediate family
    family_ids = []
    for family_member in
        if family_member.relationship in IMMEDIATE_FAMILY:

    # For each top friend, pull the photos they are tagged in
    # Gf/bf and immediate family to the front, the rest in top friends order
    top_friend_photos = []
    pulled_gfbf = False
    pulled_gfbf_family = 0
    for friend in top_friends.order_by('count'):
        friend_tags = results['tagged_with_me']['tagged_with_me'].filter(lambda x: x['subject']==friend['id'])
        if len(friend_tags) > TOP_FRIEND_MIN_PHOTOS:
            # Perform a join on `photos_of_me` to get the photo scores,
            # and sort by year, then score
            friend_photos = friend_tags.join_on_field(results['photos_of_me'], join_field_1='object_id')\
            if len(friend_tags) != len(friend_photos):
                logger.warn('Received a top friend photo that wasn\'t in \'photos_of_me\'. Odd.')
            # Bring photos of gf/bf and immediate family to the front
            if user.profile.significant_other_id and friend['id'] == user.profile.significant_other_id:
                top_friend_photos.insert(0, friend_photos)
                pulled_gfbf_family += 1
                pulled_gfbf = True
            elif friend['id'] in family_ids and pulled_gfbf_family < NUM_GFBF_FAMILY_FIRST:
                if pulled_gfbf:
                    # Insert behind their gfbf
                    top_friend_photos.insert(1, friend_photos)
                    top_friend_photos.insert(0, friend_photos)
                pulled_gfbf_family += 1

    # For each group photo, grab its score from 'photos_of_me'
    # and filter to photos from this year
    group_photos = []
    for group_photo in results['tagged_with_me']['group_photos'].fields:
        group_photo_id = group_photo['id']
        if group_photo_id in results['photos_of_me'].fields_by_id:
            logger.warn('Received a group photo %s that wasn\'t in \'photos_of_me\'. Odd.' % group_photo_id )

    # Sort by year, score
    group_photos_getter = ResultGetter.from_fields(group_photos)
    group_shots = group_photos_getter.get_in_decending_year_score_order()

    # Return the lists and the subtask
    results['top_friends'] = top_friend_photos
    results['group_shots'] = group_shots
    results['on_photos_of_me_async'] = on_photos_of_me_async
    return results
Beispiel #3
def run_book(user, results):
    runtime_start = time.time()
    task_id =

    import pdb

    # See if user has a yearbook
    #        try:
    #            yearbook = Yearbook(owner=user)
    #        except Yearbook.DoesNotExist:

    # Run the top friends task

    # Run separate, async tasks to facebook
    # "rt.s" == "run_task.subtask"
    fql_job = group([
        rt.subtask(kwargs={'task_cls': PhotosOfMeTask,           'end_time': UNIX_THIS_YEAR_END, 'user_id':, }),
        rt.subtask(kwargs={'task_cls': CommentsOnPhotosOfMeTask, 'end_time': UNIX_THIS_YEAR_END, 'user_id':, }),
        rt.subtask(kwargs={'task_cls': OwnerPostsFromYearTask,   'user_id':, }),
        rt.subtask(kwargs={'task_cls': OthersPostsFromYearTask,  'user_id':, }),
        rt.subtask(kwargs={'task_cls': FamilyTask,               'user_id':, }),
    # fql_job = group([
    #     rt.s(kwargs={'task_cls': PhotosOfMeTask,           'end_time': UNIX_THIS_YEAR_END, 'user_id':, 'parent_id': task_id},
    #          link=update_task_state.s(kwargs={'uuid': task_id, 'current_task': current_task})),
    #     rt.s(kwargs={'task_cls': CommentsOnPhotosOfMeTask, 'end_time': UNIX_THIS_YEAR_END, 'user_id':, 'parent_id': task_id}),
    #     rt.s(kwargs={'task_cls': OwnerPostsFromYearTask,   'user_id':, 'parent_id': task_id}),
    #     rt.s(kwargs={'task_cls': OthersPostsFromYearTask,  'user_id':, 'parent_id': task_id}),
    #     rt.s(kwargs={'task_cls': FamilyTask,               'user_id':, 'parent_id': task_id}),
    # ])
    job_async = fql_job.apply_async()
    job_results = job_async.get()

    results = merge_dicts(results, *job_results)

    ## Results contains
    #   'get_friends'               all friends     (already saved to db)
    #   'tagged_with_me'            `subject, object_id, created` from tags of photos I am in
    #   'comments_on_photos_of_me'
    #   'others_posts_from_year'
    #   'owner_posts_from_year'
    #   'photos_of_me'

    # Toss any results in 'tagged_with_me' that aren't in 'photos_of_me'
    results['tagged_with_me'] = results['tagged_with_me'].filter(
        lambda x: x['object_id'] in results['photos_of_me'].ids

    # Get number of people in each photo
    num_tags_by_photo_id = FreqDistResultGetter(results['tagged_with_me'], id_field='object_id')

    comments_by_photo_id = defaultdict(list)
    comments_score_by_user_id = defaultdict(lambda: 0)
    for comment in results['comments_on_photos_of_me']:
        # Get the comments in each photo

        # Get the number of commments by each user, discounted by year
        comments_score_by_user_id[comment['fromid']] += \
            TOP_FRIEND_POINTS_FOR_PHOTO_COMMENT / max((THIS_YEAR.year - comment['time'].year + 1.0), 1.0)

    # Save the photos to the database
    photos_of_me = []
    for photo in results['photos_of_me']:
        photo_db = FacebookPhoto(
            facebook_id     = photo['id'],
            created         = photo['created'],
            people_in_photo = num_tags_by_photo_id.fields_by_id[photo['id']]['count'] + 1 \
                                if photo['id'] in num_tags_by_photo_id.ids else 0,
            height          = photo['height'],
            width           = photo['width'],
            fb_url          = photo['fb_url'],
            comments        = comments_by_photo_id[photo['id']],    # it's a defaultdict
            caption         = photo['caption']

    # Save photos, profile fields, and family to db
    save_to_db_async = save_to_db.delay(user, results['family'], photos_of_me)

    ## Calculate top friends

    # Combine the lists of posts
    all_posts_this_year = ResultGetter.from_fields(itertools.chain(

    # Strip posts that have an attachment that is a photo?
#    .filter(lambda x: 'attachment' in x and 'fb_object_type' in x['attachment'] and x['attachment'])

    # Assign each friend points for each post they made
    posts_score_by_user_id = defaultdict(lambda: 0)
    for post in all_posts_this_year:
        # if 'score' not in post:
        #     post['score'] = 0
        # post['score'] += TOP_FRIEND_POINTS_FOR_POST
        posts_score_by_user_id[post['actor_id']] += TOP_FRIEND_POINTS_FOR_POST

    # Calculate photo score for each user, discounted by year
    tags_by_user_id = defaultdict(list)
    for tag in results['tagged_with_me']:

    photos_score_by_user_id = defaultdict(lambda: 0.0)
    for friend_id, tag_list in tags_by_user_id.iteritems():
        for tag in tag_list:
            photo_id = tag['object_id']
            peeps_in_photo = num_tags_by_photo_id.fields_by_id[photo_id]['count'] + 1   # num tags + me
            photo = results['photos_of_me'].fields_by_id[photo_id]
#            photo_age = 2012 - photo['created'].year + 1.0
            photo_age = - photo['created'].year + 1.0
            if peeps_in_photo == 2:
                photos_score_by_user_id[friend_id] += TOP_FRIEND_POINTS_FOR_PHOTO_OF_2 / photo_age
            elif peeps_in_photo == 3:
                photos_score_by_user_id[friend_id] += TOP_FRIEND_POINTS_FOR_PHOTO_OF_3 / photo_age
            elif peeps_in_photo >= 4:
                photos_score_by_user_id[friend_id] += TOP_FRIEND_POINTS_FOR_PHOTO_OF_4 / photo_age

    # Add em up
    top_friend_ids = (set(comments_score_by_user_id) | set(posts_score_by_user_id)
        | set(photos_score_by_user_id))
    top_friend_score_by_id = {
        friend_id: comments_score_by_user_id[friend_id] + posts_score_by_user_id[friend_id] +
        for friend_id in top_friend_ids
    top_20_friends_score_by_id = dict(sorted(top_friend_score_by_id.iteritems(), key=lambda x: x[1], reverse=True)[:20])

    ## Calculate top photos

    # For each photo, get the number of top friends in the photo
    num_top_friends_by_photo_id = defaultdict(lambda: 0)
    for tag in results['tagged_with_me']:
        if tag['subject'] in top_friend_ids:
            points = 1
            # Double points if the user in top-20
            # TODO: does this make sense?
            if tag['subject'] in top_20_friends_score_by_id:
                points += 1
            num_top_friends_by_photo_id[tag['object_id']] += points

    # Photos of all time
    top_photo_score_by_id = {}
    for photo in results['photos_of_me']:
        # How many comments by friends of mine?
        comments_from_friends = 0
        for comment in comments_by_photo_id[photo['id']]:
            if comment['fromid'] in results['get_friends'].ids:
                comments_from_friends += 1

        score = ((TOP_PHOTO_POINTS_FOR_TOP_FRIENDS * num_top_friends_by_photo_id[photo['id']] +
                  TOP_PHOTO_POINTS_FOR_COMMENT * comments_from_friends +
                  TOP_PHOTO_POINTS_FOR_LIKE * photo['like_count']) /
                      max(num_tags_by_photo_id.fields_by_id[photo['id']]['count'] - 2.0, 1.0)
                          if photo['id'] in num_tags_by_photo_id.fields_by_id else 1)

        top_photo_score_by_id[photo['id']] = photo['score'] = score

    # Update list to have scores
    tags_by_user_id = defaultdict(list)
    for tag in results['tagged_with_me']:

    ## Calculate top group photos
    # group_photos_this_year is only 1 for me
    group_photos = results['photos_of_me'] \
        .filter(lambda x: x['id'] in num_tags_by_photo_id.fields_by_id) \
        .filter(lambda x: num_tags_by_photo_id.fields_by_id[x['id']]['count'] >= GROUP_PHOTO_IS)\
        .filter(lambda x: x['created'] > GROUP_PHOTO_CUTOFF)

    group_photo_score_by_id = {}
    for photo in group_photos:
        score = GROUP_PHOTO_POINTS_FOR_TOP_FRIENDS * num_top_friends_by_photo_id[photo['id']] +\
                GROUP_PHOTO_POINTS_FOR_COMMENT * photo['comment_count'] +\
                GROUP_PHOTO_POINTS_FOR_LIKE * photo['like_count']
        group_photo_score_by_id[photo['id']] = {'score': score, 'created': photo['created']}

    ## Calculate top albums
    album_score_and_date_by_id = defaultdict(lambda: {'score': 0, 'created': None})
    for photo in results['photos_of_me']:
        album_score_and_date_by_id[photo['album_object_id']]['score'] += photo['score']
        # Also tag with the date
        album_score_and_date_by_id[photo['album_object_id']]['created'] = photo['created']

    ## Calculate top post
    for post in all_posts_this_year:
        top_friend_comments = 0
        for comment in post['comments']['comment_list']:
            if comment['fromid'] in top_friend_ids:
                top_friend_comments += 1
        post['score'] = \
            (COMMENT_POINTS_FOR_MADE_BY_ME * 1 if post['actor_id'] == user.profile.facebook_id else 0) +\
            COMMENT_POINTS_FOR_COMMENT * top_friend_comments + \
            COMMENT_POINTS_FOR_LIKE * post['like_count']

    ## Pull out birthday posts
    birthday_posts = []
    if user.profile.date_of_birth:
        birthday = user.profile.date_of_birth
        birthday_this_year = datetime.datetime(2012, birthday.month,, 0, 0, 0, tzinfo=utc)
        start_time = birthday_this_year - datetime.timedelta(days=1)
        end_time = birthday_this_year + datetime.timedelta(days=3)
        birthday_posts = all_posts_this_year.filter(
            lambda x: start_time < x['created_time'] < end_time and x['message'] and x['actor_id'] in results['get_friends'].ids

    ## Save fields to the PhotoRankings class

    rankings = PhotoRankings(user=user)
#    rankings, created = PhotoRankings.objects.get_or_create(user=user)

    top_photos_this_year = results['photos_of_me'].filter(lambda x: x['created'] > THIS_YEAR)\

    rankings.top_photos = top_photos_this_year
    rankings.group_shots = [
        k for k, v in sorted(
            # Sort by year, score
            key=lambda x: (x[1]['created'].year, x[1]['score']),
    rankings.top_posts = all_posts_this_year.order_by('score')[:10]

    # Back in time
    max_year, photos_of_me_by_year = results['photos_of_me'].bucket_by_year()
    years = list(sorted(photos_of_me_by_year.iterkeys(), reverse=True))
    back_in_time = []
    for index, year in enumerate(years[1:NUM_PREV_YEARS + 1]):
        year_photo_ids = []
        for photo in photos_of_me_by_year[year].order_by('score'):
    rankings.back_in_time = back_in_time

    ## Assign photos to the Yearbook, avoiding duplicates
    #    try:
    #        old_yb = Yearbook.objects.get(rankings=rankings)
    #        old_yb.delete()
    #    except Yearbook.DoesNotExist: pass
    yb = Yearbook(rankings=rankings)
    yb.top_post = 0
    yb.birthday_posts = list(birthday_posts.fields)

    yb.top_photo_1 = yb.get_first_unused_photo_landscape(rankings.top_photos)           # landscape
    yb.top_photo_2 = yb.get_first_unused_photo(rankings.top_photos)
    yb.top_photo_3 = yb.get_first_unused_photo(rankings.top_photos)
    yb.top_photo_4 = yb.get_first_unused_photo(rankings.top_photos)
    yb.top_photo_5 = yb.get_first_unused_photo(rankings.top_photos)

    # `assign_group_photos` uses FacebookPhoto classes to determine portrait/landscape
    # make sure they finished saving to the db
    # print('save_to_db state: %s' % save_to_db_async.state)

    # Assign the group photos from different albums, if possible
    # Make one pass assigning from different albums,
    # then a second filling in the gaps
    assigned_group_photos = assign_group_photos(yb, rankings, results['photos_of_me'], do_unique_albums=True)
    if assigned_group_photos < NUM_GROUP_PHOTOS:
        assign_group_photos(yb, rankings, results['photos_of_me'], do_unique_albums=False)

    ## Top friends
    # Do this after we assign the top photos and top group photos,
    # so we can make sure there are enough unused photos of them

    # We need to make sure the user exists in the db
    # Users that came back from the db are still in results['get_friends']
    saved_friends_ids = results['get_friends'].ids

    family_ids ='facebook_id', flat=True)
    top_friend_ids = []
    gfbf_added = False
    for user_id, score in sorted(top_friend_score_by_id.iteritems(), key=lambda x: x[1], reverse=True):
        if yb.num_unused_photos(tags_by_user_id[user_id]) >= TOP_FRIEND_MIN_UNUSED_PHOTOS and user_id in saved_friends_ids:
            # If user is family or gfbf, insert at front
            if user_id == user.profile.significant_other_id:
                top_friend_ids.insert(0, user_id)
                gfbf_added = True
            elif user_id in family_ids:
                top_friend_ids.insert(1 if gfbf_added else 0, user_id)

    # Need to build another list that combines tag and photo score
    rankings.top_friends_ids = top_friend_ids[:NUM_TOP_FRIENDS_STORED]
    top_friends_photos = []
    for friend_id in top_friend_ids:
        friend_tags = tags_by_user_id[friend_id]
        top_friend_photos = []
        for tag in friend_tags:
            tag_id = tag['object_id']
            photo = results['photos_of_me'].fields_by_id[tag_id]
            top_friend_photos.append({'id': tag_id, 'score': top_photo_score_by_id[tag_id],
                                      'width': photo['width'], 'height': photo['height']})
        top_friend_photos = list(sorted(top_friend_photos, key=lambda x: x['score'], reverse=True))
    rankings.top_friends_photos = top_friends_photos

    ## Assign the top friends
#    used_albums = []
    for index in range(NUM_TOP_FRIENDS):
        # Index
        setattr(yb, 'top_friend_%d' % (index + 1), index)
        # Friend stat
        if top_friend_ids[index] == user.profile.significant_other_id:
            friend_stat = SIGNIFICANT_OTHER_STAT
        elif top_friend_ids[index] in family_ids:
            friend_stat = FAMILY_STAT
            num_tags = len(rankings.top_friends_photos[index])
            friend_stat = 'Tagged in %d photo%s with you' % (num_tags, 's' if num_tags > 1 else '')
        setattr(yb, 'top_friend_%d_stat' % (index + 1), friend_stat)
        # Set photo
#        tf_photo_index = yb.get_first_unused_photo(rankings.top_friends_photos[index])
        tf_photo_index = yb.get_first_unused_photo_landscape(rankings.top_friends_photos[index])
        setattr(yb, 'top_friend_%d_photo_1' % (index + 1), tf_photo_index)
        # If photo was portrait, grab another one
#        tf_photo_id = rankings.top_friends_photos[index][tf_photo_index]['id']
#        tf_photo = results['photos_of_me'].fields_by_id[tf_photo_id]
#        if tf_photo['width'] / float(tf_photo['height']) < HIGHEST_SQUARE_ASPECT_RATIO:
#            tf_photo_index_2 = yb.get_first_unused_photo(rankings.top_friends_photos[index])
#            setattr(yb, 'top_friend_%d_photo_2' % (index + 1), tf_photo_index_2)

    ## Top albums

    # Start pulling album names, photos
    # Can't pickle defaultdict? so just call it here, wouldn't save us much time anyway
    #    pull_albums_async = pull_album_photos.delay(user, album_score_and_date_by_id)
    #    album_photos_by_score, albums_ranked = pull_albums_async.get()
    album_photos_by_score, albums_ranked = pull_album_photos(user, album_score_and_date_by_id, album_photo_score)
    rankings.top_albums_photos = album_photos_by_score
    rankings.top_albums_ranked = albums_ranked

    albums_assigned = 0
    all_top_albums = rankings.top_albums_photos[:]
    curr_album_index = -1
    while all_top_albums:
        curr_album = all_top_albums.pop(0)
        curr_album_index += 1
        photos_to_show = []
        no_more_pics_of_user = False
        while True:
            if len(photos_to_show) < PICS_OF_USER_TO_PROMOTE and not no_more_pics_of_user:
                # Want a pic of the user, loop through album photos looking for one
                photo_of_user = get_next_unused_photo_of_user(
                if photo_of_user:
                    # No more pics of user, just take the next highest unused photo
                    no_more_pics_of_user = True
                next_photo = yb.get_first_unused_photo(curr_album, used_indices=photos_to_show)
                if next_photo is not None:
                    # No photos left, break
            if len(photos_to_show) >= ALBUM_PHOTOS_TO_SHOW:
        if len(photos_to_show) < ALBUM_MIN_PHOTOS:
            # Didn't have enough photos, try the next album

        # Save the fields
        album_str = 'top_album_%d' % (albums_assigned + 1)
        setattr(yb, album_str, curr_album_index)
        for field_num in range(len(photos_to_show)):
            setattr(yb, album_str + '_photo_%d' % (field_num + 1), photos_to_show[field_num])
        albums_assigned += 1
        if albums_assigned >= NUM_TOP_ALBUMS:

    ## Throughout the year photos

    yb.year_photo_1 = yb.get_first_unused_photo_landscape(rankings.top_photos)
    yb.year_photo_2 = yb.get_first_unused_photo(rankings.top_photos)
    yb.year_photo_6 = get_unused_if_portrait(yb.year_photo_2, rankings.top_photos, yb, results['photos_of_me'])
    yb.year_photo_3 = yb.get_first_unused_photo(rankings.top_photos)
    yb.year_photo_7 = get_unused_if_portrait(yb.year_photo_3, rankings.top_photos, yb, results['photos_of_me'])
    yb.year_photo_4 = yb.get_first_unused_photo(rankings.top_photos)
    yb.year_photo_8 = get_unused_if_portrait(yb.year_photo_4, rankings.top_photos, yb, results['photos_of_me'])
    yb.year_photo_5 = yb.get_first_unused_photo(rankings.top_photos)
    yb.year_photo_9 = get_unused_if_portrait(yb.year_photo_5, rankings.top_photos, yb, results['photos_of_me'])

    ## Back in time photos

    years_to_show = []
    for year_index, year in enumerate(back_in_time):
        curr_year_unused = yb.get_first_unused_photo(year)
        if curr_year_unused is None:
        years_to_show.append({'year_index': year_index, 'photo_index': curr_year_unused})
        if len(years_to_show) > NUM_PREV_YEARS:

    # Special case: if only found one year, pull an additional photo from that year
    if len(years_to_show) == 1:
        that_year_index = years_to_show[0]['index']
        unused_photo_2 = yb.get_first_unused_photo(back_in_time[that_year_index])
        if unused_photo_2 is not None:
            years_to_show.append({'year_index': that_year_index, 'photo_index': unused_photo_2})

    # Save
    for year_num in range(len(years_to_show)):
        field_str = 'back_in_time_%d' % (year_num + 1)
        setattr(yb, field_str, years_to_show[year_num]['year_index'])
        setattr(yb, field_str + '_photo_1', years_to_show[year_num]['photo_index'])

    # Save everything
    yb.rankings = rankings
    yb.run_time = time.time() - runtime_start

    # Log the yearbook run time to mixpanel
    tracker.delay('Book Created', properties={
        'distinct_id': user.username,
        'mp_name_tag': user.username,
        'time': time.time(),
        'Book': 'Yearbook 2012',
        'Run Time (sec)': '%.1f' % yb.run_time

    # Initiate a task to start downloading user's yearbook phointos?
    return yb