def top_friends_task(user_id): """ Pulls all friends and all photo tags, combines them to get `top_friends_score` and saves to the `User.friends` field """ # Run the child tasks friends_task = run_fql.s(task_cls=GetFriendsTask, user_id=user_id) tagged_with_me_task = run_fql.s(task_cls=TaggedWithMeTask, user_id=user_id) job_async = group([friends_task, tagged_with_me_task]).apply_async() results = job_async.get() results = merge_dicts(*results) all_friends = results['get_friends'] tagged_with_me = results['tagged_with_me'] # Collapse the tags by user_id, discount by age tag_score_by_user_id = defaultdict(lambda: 0.0) for tag in tagged_with_me.fields: tag_age = datetime.date.today().year - tag['created'].year + 1.0 tag_score_by_user_id[tag['subject']] += 1 / tag_age # Sort user_ids_in_score_order = sorted(tag_score_by_user_id.items(), key=lambda x: x[1]) # Reversing them means the index corresponds to top friends order top_friends_order_by_id = {} for top_friends_order, u in enumerate(user_ids_in_score_order): top_friends_order_by_id[u[0]] = top_friends_order + 1 # 0 is not a valid value # Copy `top_friends_order`s to `all_friends` for id, top_friends_order in top_friends_order_by_id.items(): try: all_friends.fields_by_id[id]['top_friends_order'] = top_friends_order except KeyError: # Means you were tagged with someone you aren't friends with pass # Save user = User.objects.get(id=user_id) # Clear current friends FacebookFriend.objects(user=user).delete() # Bulk insert the new ones docs = [] for friend in results['get_friends'].fields: uid = friend.pop('id') docs.append(FacebookFriend(uid=uid, user=user, **friend)) FacebookFriend.objects.insert(docs) logger.info('Pulled %d friends, %d top friends for user %s' % (len(all_friends), len(top_friends_order_by_id), user.get_id_str())) return results
def run_book(user): """ The overall philosophy here is that ---- - these functions annotate the data - the page classes read the annotations and "claim" photos """ runtime_start = time.time() # Create the book book = Book(user=user, book_type=BOOK.book_type) # Run separate, async tasks to facebook fql_job = group([ run_fql.s(kwargs={'task_cls': PhotosOfMeTask, 'user_id': user.id}), run_fql.s(kwargs={'task_cls': CommentsOnPhotosOfMeTask, 'user_id': user.id}), run_fql.s(kwargs={'task_cls': OwnerPostsFromYearTask, 'user_id': user.id}), run_fql.s(kwargs={'task_cls': OthersPostsFromYearTask, 'user_id': user.id}), ]) job_async = fql_job.apply_async() # family_async = run_fql.s(task_cls=FamilyTask, user_id=user.id, commit=True).apply_async() family_async = run_fql.delay(task_cls=FamilyTask, user_id=user.id, commit=True) # While that's running, collect the other dependent tasks friends_task_async = get_result_or_run_fql.s(task_cls=GetFriendsTask, user_id=user.id) tagged_with_me_task_async = get_result_or_run_fql.s(task_cls=TaggedWithMeTask, user_id=user.id) results = job_async.get() results['get_friends'] = friends_task_async.get() results['tagged_with_me'] = tagged_with_me_task_async.get() ## Results contains get_friends = results['get_friends'] # --> all friends (already saved to db) tagged_with_me = results['tagged_with_me'] # --> `subject, object_id, created` from tags of photos I am in comments_on_photos_of_me = results['comments_on_photos_of_me'] others_posts_from_year = results['others_posts_from_year'] owner_posts_from_year = results['owner_posts_from_year'] photos_of_me = results['photos_of_me'] ## Save all the photos results = save_photos_and_tags(user, book, tagged_with_me, comments_on_photos_of_me, photos_of_me) db_photos_by_id = results['db_photos_by_id'] comments_score_by_user_id = results['comments_score_by_user_id'] ## If they don't have enough photos this year, bail out of the yearbook process if Photo.objects(book=book).get_year(YEARBOOK_YEAR).count() < MIN_TOP_PHOTOS_FOR_BOOK: celery.current_task.update_state(state='NOT_ENOUGH_PHOTOS') # This is a hack because celery overwrites the task state when you return # could also use an `after_return` handler, see http://bit.ly/16U6YKv return 'NOT_ENOUGH_PHOTOS' ## Calculate top friends results = calculate_top_friends(user, book, others_posts_from_year, owner_posts_from_year, db_photos_by_id, photos_of_me, comments_score_by_user_id) all_posts_this_year = results['all_posts_this_year'] db_friends_by_id = results['db_friends_by_id'] ## Save all posts db_posts = [] for post in all_posts_this_year: db_posts.append(WallPost(user=user, book=book, **post)) WallPost.insert(db_posts) ## Score all the photos calculate_photo_scores(book) ## Calculate top group photos calculate_top_group_photo_scores(book) ## Calculate top albums album_score_and_date_by_id = calculate_top_albums(photos_of_me) ## Assign post scores assign_post_scores(book) ## Pull out birthday posts birthday_posts = get_birthday_posts(book) ## Go through and add tags to the photo objects ## tags indicate membership in a category - group_photo ## tags may also relate to a (new) score field - group_photo_score # We run the pages one by one. They run in order of preference (not necessarily book order) # Each page has a inclusion criteria, and a assignment function # Back in time max_year, photos_of_me_by_year = results['photos_of_me'].bucket_by_year() years = list(sorted(photos_of_me_by_year.keys(), reverse=True)) back_in_time = [] for index, year in enumerate(years[1:NUM_PREV_YEARS + 1]): year_photo_ids = [] for photo in photos_of_me_by_year[year].order_by('score'): year_photo_ids.append(photo['id']) back_in_time.append(year_photo_ids) rankings.back_in_time = back_in_time ## Assign photos to the Yearbook, avoiding duplicates # try: # old_yb = Yearbook.objects.get(rankings=rankings) # old_yb.delete() # except Yearbook.DoesNotExist: pass yb.top_post = 0 yb.birthday_posts = birthday_posts.fields yb.top_photo_1 = yb.get_first_unused_photo_landscape(rankings.top_photos) # landscape yb.top_photo_2 = yb.get_first_unused_photo(rankings.top_photos) yb.top_photo_3 = yb.get_first_unused_photo(rankings.top_photos) yb.top_photo_4 = yb.get_first_unused_photo(rankings.top_photos) yb.top_photo_5 = yb.get_first_unused_photo(rankings.top_photos) # `assign_group_photos` uses FacebookPhoto classes to determine portrait/landscape # make sure they finished saving to the db # print 'save_to_db state: %s' % save_to_db_async.state save_to_db_async.get() # Assign the group photos from different albums, if possible # Make one pass assigning from different albums, # then a second filling in the gaps # assigned_group_photos = assign_group_photos(yb, rankings, results['photos_of_me'], do_unique_albums=True) # if assigned_group_photos < NUM_GROUP_PHOTOS: # assign_group_photos(yb, rankings, results['photos_of_me'], do_unique_albums=False) ## Top friends # Do this after we assign the top photos and top group photos, # so we can make sure there are enough unused photos of them # We need to make sure the user exists in the db # Users that came back from the db are still in results['get_friends'] saved_friends_ids = results['get_friends'].ids # Make sure the family task finished running family_async.get() # Need to re-sync `user`? family_ids = user.family.all().values_list('facebook_id', flat=True) top_friend_ids = [] gfbf_added = False for user_id, score in sorted(top_friend_score_by_id.items(), key=lambda x: x[1], reverse=True): if yb.num_unused_photos(tags_by_user_id[user_id]) >= TOP_FRIEND_MIN_UNUSED_PHOTOS and user_id in saved_friends_ids: # If user is family or gfbf, insert at front if user_id == user.profile.significant_other_id: top_friend_ids.insert(0, user_id) gfbf_added = True elif user_id in family_ids: top_friend_ids.insert(1 if gfbf_added else 0, user_id) else: top_friend_ids.append(user_id) # Need to build another list that combines tag and photo score rankings.top_friends_ids = top_friend_ids[:NUM_TOP_FRIENDS_STORED] top_friends_photos = [] for friend_id in top_friend_ids: friend_tags = tags_by_user_id[friend_id] top_friend_photos = [] for tag in friend_tags: tag_id = tag['object_id'] photo = results['photos_of_me'].fields_by_id[tag_id] top_friend_photos.append({'id': tag_id, 'score': top_photo_score_by_id[tag_id], 'width': photo['width'], 'height': photo['height']}) top_friend_photos.sort(key=lambda x: x['score'], reverse=True) top_friends_photos.append(top_friend_photos) rankings.top_friends_photos = top_friends_photos ## Assign the top friends # used_albums = [] for index in range(min(NUM_TOP_FRIENDS, len(top_friend_ids))): # Index setattr(yb, 'top_friend_%d' % (index + 1), index) # Friend stat if top_friend_ids[index] == user.profile.significant_other_id: friend_stat = SIGNIFICANT_OTHER_STAT elif top_friend_ids[index] in family_ids: friend_stat = FAMILY_STAT else: num_tags = len(rankings.top_friends_photos[index]) friend_stat = u'Tagged in %d photo%s with you' % (num_tags, 's' if num_tags > 1 else '') setattr(yb, 'top_friend_%d_stat' % (index + 1), friend_stat) # Set photo # tf_photo_index = yb.get_first_unused_photo(rankings.top_friends_photos[index]) tf_photo_index = yb.get_first_unused_photo_landscape(rankings.top_friends_photos[index]) setattr(yb, 'top_friend_%d_photo_1' % (index + 1), tf_photo_index) # If photo was portrait, grab another one # tf_photo_id = rankings.top_friends_photos[index][tf_photo_index]['id'] # tf_photo = results['photos_of_me'].fields_by_id[tf_photo_id] # if tf_photo['width'] / float(tf_photo['height']) < HIGHEST_SQUARE_ASPECT_RATIO: # tf_photo_index_2 = yb.get_first_unused_photo(rankings.top_friends_photos[index]) # setattr(yb, 'top_friend_%d_photo_2' % (index + 1), tf_photo_index_2) ## Top albums # Start pulling album names, photos # Can't pickle defaultdict? so just call it here, wouldn't save us much time anyway # pull_albums_async = pull_album_photos.delay(user, album_score_and_date_by_id) # album_photos_by_score, albums_ranked = pull_albums_async.get() album_photos_by_score, albums_ranked = pull_album_photos(user, album_score_and_date_by_id) rankings.top_albums_photos = album_photos_by_score rankings.top_albums_ranked = albums_ranked albums_assigned = 0 all_top_albums = rankings.top_albums_photos[:] curr_album_index = -1 while all_top_albums: curr_album = all_top_albums.pop(0) curr_album_index += 1 photos_to_show = [] no_more_pics_of_user = False while True: if len(photos_to_show) < PICS_OF_USER_TO_PROMOTE and not no_more_pics_of_user: # Want a pic of the user, loop through album photos looking for one photo_of_user = get_next_unused_photo_of_user( yb, curr_album, results['photos_of_me'], used_indices=photos_to_show ) if photo_of_user: photos_to_show.append(photo_of_user) else: # No more pics of user, just take the next highest unused photo no_more_pics_of_user = True else: next_photo = yb.get_first_unused_photo(curr_album, used_indices=photos_to_show) if next_photo is not None: photos_to_show.append(next_photo) else: # No photos left, break break if len(photos_to_show) >= ALBUM_PHOTOS_TO_SHOW: break if len(photos_to_show) < ALBUM_MIN_PHOTOS: # Didn't have enough photos, try the next album continue # Save the fields album_str = 'top_album_%d' % (albums_assigned + 1) setattr(yb, album_str, curr_album_index) for field_num in range(len(photos_to_show)): setattr(yb, album_str + '_photo_%d' % (field_num + 1), photos_to_show[field_num]) albums_assigned += 1 if albums_assigned >= NUM_TOP_ALBUMS: break ## Throughout the year photos yb.year_photo_1 = yb.get_first_unused_photo_landscape(rankings.top_photos) yb.year_photo_2 = yb.get_first_unused_photo(rankings.top_photos) yb.year_photo_6 = get_unused_if_portrait(yb.year_photo_2, rankings.top_photos, yb, results['photos_of_me']) yb.year_photo_3 = yb.get_first_unused_photo(rankings.top_photos) yb.year_photo_7 = get_unused_if_portrait(yb.year_photo_3, rankings.top_photos, yb, results['photos_of_me']) yb.year_photo_4 = yb.get_first_unused_photo(rankings.top_photos) yb.year_photo_8 = get_unused_if_portrait(yb.year_photo_4, rankings.top_photos, yb, results['photos_of_me']) yb.year_photo_5 = yb.get_first_unused_photo(rankings.top_photos) yb.year_photo_9 = get_unused_if_portrait(yb.year_photo_5, rankings.top_photos, yb, results['photos_of_me']) ## Back in time photos years_to_show = [] for year_index, year in enumerate(back_in_time): curr_year_unused = yb.get_first_unused_photo(year) if curr_year_unused is None: continue years_to_show.append({'year_index': year_index, 'photo_index': curr_year_unused}) if len(years_to_show) > NUM_PREV_YEARS: break # Special case: if only found one year, pull an additional photo from that year if len(years_to_show) == 1: that_year_index = years_to_show[0]['year_index'] unused_photo_2 = yb.get_first_unused_photo(back_in_time[that_year_index]) if unused_photo_2 is not None: years_to_show.append({'year_index': that_year_index, 'photo_index': unused_photo_2}) # Save for year_num in range(len(years_to_show)): field_str = 'back_in_time_%d' % (year_num + 1) setattr(yb, field_str, years_to_show[year_num]['year_index']) setattr(yb, field_str + '_photo_1', years_to_show[year_num]['photo_index']) # Tabulate the list of all friends tagged in the book and store all_photos = yb._get_all_used_ids() all_tagged_people = itertools.chain(*[ tagged_people_by_photo_id[photo_id] for photo_id in all_photos ]) tagged_people_count = Counter(all_tagged_people) yb.friends_in_book = tagged_people_count.most_common() # Update the photos Photo.insert(db_photos_by_id.items()) # Save the book book.run_time = time.time() - runtime_start book.save() # Log the yearbook run time to mixpanel tracker.delay('Book Created', properties={ 'distinct_id': user.username, 'mp_name_tag': user.username, 'time': time.time(), 'Book': 'Yearbook 2012', 'Run Time (sec)': '%.1f' % yb.run_time }) # Initiate a task to start downloading user's yearbook phointos? return book