예제 #1
0
def top_friends_task(user_id):
    """
    Pulls all friends and all photo tags, combines them
    to get `top_friends_score` and saves to the `User.friends` field
    """
    # Run the child tasks
    friends_task =          run_fql.s(task_cls=GetFriendsTask, user_id=user_id)
    tagged_with_me_task =   run_fql.s(task_cls=TaggedWithMeTask, user_id=user_id)
    job_async = group([friends_task, tagged_with_me_task]).apply_async()

    results = job_async.get()
    results = merge_dicts(*results)

    all_friends = results['get_friends']
    tagged_with_me = results['tagged_with_me']

    # Collapse the tags by user_id, discount by age
    tag_score_by_user_id = defaultdict(lambda: 0.0)
    for tag in tagged_with_me.fields:
        tag_age = datetime.date.today().year - tag['created'].year + 1.0
        tag_score_by_user_id[tag['subject']] += 1 / tag_age

    # Sort
    user_ids_in_score_order = sorted(tag_score_by_user_id.items(), key=lambda x: x[1])

    # Reversing them means the index corresponds to top friends order
    top_friends_order_by_id = {}
    for top_friends_order, u in enumerate(user_ids_in_score_order):
        top_friends_order_by_id[u[0]] = top_friends_order + 1   # 0 is not a valid value

    # Copy `top_friends_order`s to `all_friends`
    for id, top_friends_order in top_friends_order_by_id.items():
        try:
            all_friends.fields_by_id[id]['top_friends_order'] = top_friends_order
        except KeyError:
            # Means you were tagged with someone you aren't friends with
            pass

    # Save
    user = User.objects.get(id=user_id)
    # Clear current friends
    FacebookFriend.objects(user=user).delete()
    # Bulk insert the new ones
    docs = []
    for friend in results['get_friends'].fields:
        uid = friend.pop('id')
        docs.append(FacebookFriend(uid=uid, user=user, **friend))
    FacebookFriend.objects.insert(docs)

    logger.info('Pulled %d friends, %d top friends for user %s'
                % (len(all_friends), len(top_friends_order_by_id), user.get_id_str()))

    return results
예제 #2
0
def run_book(user):
    """
    The overall philosophy here is that ----
        - these functions annotate the data
        - the page classes read the annotations and "claim" photos
    """
    runtime_start = time.time()

    # Create the book
    book = Book(user=user, book_type=BOOK.book_type)

    # Run separate, async tasks to facebook
    fql_job = group([
        run_fql.s(kwargs={'task_cls': PhotosOfMeTask,           'user_id': user.id}),
        run_fql.s(kwargs={'task_cls': CommentsOnPhotosOfMeTask, 'user_id': user.id}),
        run_fql.s(kwargs={'task_cls': OwnerPostsFromYearTask,   'user_id': user.id}),
        run_fql.s(kwargs={'task_cls': OthersPostsFromYearTask,  'user_id': user.id}),
    ])
    job_async = fql_job.apply_async()

    # family_async = run_fql.s(task_cls=FamilyTask, user_id=user.id, commit=True).apply_async()
    family_async = run_fql.delay(task_cls=FamilyTask, user_id=user.id, commit=True)

    # While that's running, collect the other dependent tasks
    friends_task_async =        get_result_or_run_fql.s(task_cls=GetFriendsTask, user_id=user.id)
    tagged_with_me_task_async = get_result_or_run_fql.s(task_cls=TaggedWithMeTask, user_id=user.id)

    results = job_async.get()
    results['get_friends'] = friends_task_async.get()
    results['tagged_with_me'] = tagged_with_me_task_async.get()

    ## Results contains
    get_friends              = results['get_friends']       # --> all friends     (already saved to db)
    tagged_with_me           = results['tagged_with_me']    # --> `subject, object_id, created` from tags of photos I am in
    comments_on_photos_of_me = results['comments_on_photos_of_me']
    others_posts_from_year   = results['others_posts_from_year']
    owner_posts_from_year    = results['owner_posts_from_year']
    photos_of_me             = results['photos_of_me']

    ## Save all the photos
    results = save_photos_and_tags(user, book, tagged_with_me, comments_on_photos_of_me, photos_of_me)

    db_photos_by_id = results['db_photos_by_id']
    comments_score_by_user_id = results['comments_score_by_user_id']

    ## If they don't have enough photos this year, bail out of the yearbook process

    if Photo.objects(book=book).get_year(YEARBOOK_YEAR).count() < MIN_TOP_PHOTOS_FOR_BOOK:
        celery.current_task.update_state(state='NOT_ENOUGH_PHOTOS')
        # This is a hack because celery overwrites the task state when you return
        # could also use an `after_return` handler, see http://bit.ly/16U6YKv
        return 'NOT_ENOUGH_PHOTOS'

    ## Calculate top friends
    results = calculate_top_friends(user, book,
                                    others_posts_from_year, owner_posts_from_year, db_photos_by_id,
                                    photos_of_me, comments_score_by_user_id)

    all_posts_this_year = results['all_posts_this_year']
    db_friends_by_id    = results['db_friends_by_id']

    ## Save all posts
    db_posts = []
    for post in all_posts_this_year:
        db_posts.append(WallPost(user=user, book=book, **post))
    WallPost.insert(db_posts)

    ## Score all the photos
    calculate_photo_scores(book)

    ## Calculate top group photos
    calculate_top_group_photo_scores(book)


    ## Calculate top albums
    album_score_and_date_by_id = calculate_top_albums(photos_of_me)

    ## Assign post scores
    assign_post_scores(book)

    ## Pull out birthday posts
    birthday_posts = get_birthday_posts(book)

    ## Go through and add tags to the photo objects
    ##    tags indicate membership in a category - group_photo
    ##    tags may also relate to a (new) score field - group_photo_score


    # We run the pages one by one. They run in order of preference (not necessarily book order)
    # Each page has a inclusion criteria, and a assignment function


    # Back in time
    max_year, photos_of_me_by_year = results['photos_of_me'].bucket_by_year()
    years = list(sorted(photos_of_me_by_year.keys(), reverse=True))
    back_in_time = []
    for index, year in enumerate(years[1:NUM_PREV_YEARS + 1]):
        year_photo_ids = []
        for photo in photos_of_me_by_year[year].order_by('score'):
            year_photo_ids.append(photo['id'])
        back_in_time.append(year_photo_ids)
    rankings.back_in_time = back_in_time

    ## Assign photos to the Yearbook, avoiding duplicates
    #    try:
    #        old_yb = Yearbook.objects.get(rankings=rankings)
    #        old_yb.delete()
    #    except Yearbook.DoesNotExist: pass

    yb.top_post = 0
    yb.birthday_posts = birthday_posts.fields

    yb.top_photo_1 = yb.get_first_unused_photo_landscape(rankings.top_photos)           # landscape
    yb.top_photo_2 = yb.get_first_unused_photo(rankings.top_photos)
    yb.top_photo_3 = yb.get_first_unused_photo(rankings.top_photos)
    yb.top_photo_4 = yb.get_first_unused_photo(rankings.top_photos)
    yb.top_photo_5 = yb.get_first_unused_photo(rankings.top_photos)

    # `assign_group_photos` uses FacebookPhoto classes to determine portrait/landscape
    # make sure they finished saving to the db
    # print 'save_to_db state: %s' % save_to_db_async.state
    save_to_db_async.get()

    # Assign the group photos from different albums, if possible
    # Make one pass assigning from different albums,
    # then a second filling in the gaps
    #    assigned_group_photos = assign_group_photos(yb, rankings, results['photos_of_me'], do_unique_albums=True)
    #    if assigned_group_photos < NUM_GROUP_PHOTOS:
    #        assign_group_photos(yb, rankings, results['photos_of_me'], do_unique_albums=False)

    ## Top friends
    # Do this after we assign the top photos and top group photos,
    # so we can make sure there are enough unused photos of them

    # We need to make sure the user exists in the db
    # Users that came back from the db are still in results['get_friends']
    saved_friends_ids = results['get_friends'].ids

    # Make sure the family task finished running
    family_async.get()
    # Need to re-sync `user`?
    family_ids = user.family.all().values_list('facebook_id', flat=True)
    top_friend_ids = []
    gfbf_added = False
    for user_id, score in sorted(top_friend_score_by_id.items(), key=lambda x: x[1], reverse=True):
        if yb.num_unused_photos(tags_by_user_id[user_id]) >= TOP_FRIEND_MIN_UNUSED_PHOTOS and user_id in saved_friends_ids:
            # If user is family or gfbf, insert at front
            if user_id == user.profile.significant_other_id:
                top_friend_ids.insert(0, user_id)
                gfbf_added = True
            elif user_id in family_ids:
                top_friend_ids.insert(1 if gfbf_added else 0, user_id)
            else:
                top_friend_ids.append(user_id)

    # Need to build another list that combines tag and photo score
    rankings.top_friends_ids = top_friend_ids[:NUM_TOP_FRIENDS_STORED]
    top_friends_photos = []
    for friend_id in top_friend_ids:
        friend_tags = tags_by_user_id[friend_id]
        top_friend_photos = []
        for tag in friend_tags:
            tag_id = tag['object_id']
            photo = results['photos_of_me'].fields_by_id[tag_id]
            top_friend_photos.append({'id': tag_id, 'score': top_photo_score_by_id[tag_id],
                                      'width': photo['width'], 'height': photo['height']})
        top_friend_photos.sort(key=lambda x: x['score'], reverse=True)
        top_friends_photos.append(top_friend_photos)
    rankings.top_friends_photos = top_friends_photos

    ## Assign the top friends
    #    used_albums = []
    for index in range(min(NUM_TOP_FRIENDS, len(top_friend_ids))):
        # Index
        setattr(yb, 'top_friend_%d' % (index + 1), index)
        # Friend stat
        if top_friend_ids[index] == user.profile.significant_other_id:
            friend_stat = SIGNIFICANT_OTHER_STAT
        elif top_friend_ids[index] in family_ids:
            friend_stat = FAMILY_STAT
        else:
            num_tags = len(rankings.top_friends_photos[index])
            friend_stat = u'Tagged in %d photo%s with you' % (num_tags, 's' if num_tags > 1 else '')
        setattr(yb, 'top_friend_%d_stat' % (index + 1), friend_stat)
        # Set photo
        #        tf_photo_index = yb.get_first_unused_photo(rankings.top_friends_photos[index])
        tf_photo_index = yb.get_first_unused_photo_landscape(rankings.top_friends_photos[index])
        setattr(yb, 'top_friend_%d_photo_1' % (index + 1), tf_photo_index)
        # If photo was portrait, grab another one
    #        tf_photo_id = rankings.top_friends_photos[index][tf_photo_index]['id']
    #        tf_photo = results['photos_of_me'].fields_by_id[tf_photo_id]
    #        if tf_photo['width'] / float(tf_photo['height']) < HIGHEST_SQUARE_ASPECT_RATIO:
    #            tf_photo_index_2 = yb.get_first_unused_photo(rankings.top_friends_photos[index])
    #            setattr(yb, 'top_friend_%d_photo_2' % (index + 1), tf_photo_index_2)

    ## Top albums

    # Start pulling album names, photos
    # Can't pickle defaultdict? so just call it here, wouldn't save us much time anyway
    #    pull_albums_async = pull_album_photos.delay(user, album_score_and_date_by_id)
    #    album_photos_by_score, albums_ranked = pull_albums_async.get()
    album_photos_by_score, albums_ranked = pull_album_photos(user, album_score_and_date_by_id)
    rankings.top_albums_photos = album_photos_by_score
    rankings.top_albums_ranked = albums_ranked

    albums_assigned = 0
    all_top_albums = rankings.top_albums_photos[:]
    curr_album_index = -1
    while all_top_albums:
        curr_album = all_top_albums.pop(0)
        curr_album_index += 1
        photos_to_show = []
        no_more_pics_of_user = False
        while True:
            if len(photos_to_show) < PICS_OF_USER_TO_PROMOTE and not no_more_pics_of_user:
                # Want a pic of the user, loop through album photos looking for one
                photo_of_user = get_next_unused_photo_of_user(
                    yb,
                    curr_album,
                    results['photos_of_me'],
                    used_indices=photos_to_show
                )
                if photo_of_user:
                    photos_to_show.append(photo_of_user)
                else:
                    # No more pics of user, just take the next highest unused photo
                    no_more_pics_of_user = True
            else:
                next_photo = yb.get_first_unused_photo(curr_album, used_indices=photos_to_show)
                if next_photo is not None:
                    photos_to_show.append(next_photo)
                else:
                    # No photos left, break
                    break
            if len(photos_to_show) >= ALBUM_PHOTOS_TO_SHOW:
                break
        if len(photos_to_show) < ALBUM_MIN_PHOTOS:
            # Didn't have enough photos, try the next album
            continue

        # Save the fields
        album_str = 'top_album_%d' % (albums_assigned + 1)
        setattr(yb, album_str, curr_album_index)
        for field_num in range(len(photos_to_show)):
            setattr(yb, album_str + '_photo_%d' % (field_num + 1), photos_to_show[field_num])
        albums_assigned += 1
        if albums_assigned >= NUM_TOP_ALBUMS:
            break

    ## Throughout the year photos

    yb.year_photo_1 = yb.get_first_unused_photo_landscape(rankings.top_photos)
    yb.year_photo_2 = yb.get_first_unused_photo(rankings.top_photos)
    yb.year_photo_6 = get_unused_if_portrait(yb.year_photo_2, rankings.top_photos, yb, results['photos_of_me'])
    yb.year_photo_3 = yb.get_first_unused_photo(rankings.top_photos)
    yb.year_photo_7 = get_unused_if_portrait(yb.year_photo_3, rankings.top_photos, yb, results['photos_of_me'])
    yb.year_photo_4 = yb.get_first_unused_photo(rankings.top_photos)
    yb.year_photo_8 = get_unused_if_portrait(yb.year_photo_4, rankings.top_photos, yb, results['photos_of_me'])
    yb.year_photo_5 = yb.get_first_unused_photo(rankings.top_photos)
    yb.year_photo_9 = get_unused_if_portrait(yb.year_photo_5, rankings.top_photos, yb, results['photos_of_me'])

    ## Back in time photos

    years_to_show = []
    for year_index, year in enumerate(back_in_time):
        curr_year_unused = yb.get_first_unused_photo(year)
        if curr_year_unused is None:
            continue
        years_to_show.append({'year_index': year_index, 'photo_index': curr_year_unused})
        if len(years_to_show) > NUM_PREV_YEARS:
            break

    # Special case: if only found one year, pull an additional photo from that year
    if len(years_to_show) == 1:
        that_year_index = years_to_show[0]['year_index']
        unused_photo_2 = yb.get_first_unused_photo(back_in_time[that_year_index])
        if unused_photo_2 is not None:
            years_to_show.append({'year_index': that_year_index, 'photo_index': unused_photo_2})

    # Save
    for year_num in range(len(years_to_show)):
        field_str = 'back_in_time_%d' % (year_num + 1)
        setattr(yb, field_str, years_to_show[year_num]['year_index'])
        setattr(yb, field_str + '_photo_1', years_to_show[year_num]['photo_index'])

    # Tabulate the list of all friends tagged in the book and store
    all_photos = yb._get_all_used_ids()
    all_tagged_people = itertools.chain(*[
        tagged_people_by_photo_id[photo_id] for photo_id in all_photos
    ])
    tagged_people_count = Counter(all_tagged_people)
    yb.friends_in_book = tagged_people_count.most_common()

    # Update the photos
    Photo.insert(db_photos_by_id.items())

    # Save the book
    book.run_time = time.time() - runtime_start
    book.save()

    # Log the yearbook run time to mixpanel
    tracker.delay('Book Created', properties={
        'distinct_id': user.username,
        'mp_name_tag': user.username,
        'time': time.time(),
        'Book': 'Yearbook 2012',
        'Run Time (sec)': '%.1f' % yb.run_time
    })

    # Initiate a task to start downloading user's yearbook phointos?
    return book