def top_friends_task(user_id): """ Pulls all friends and all photo tags, combines them to get `top_friends_score` and saves to the `User.friends` field """ # Run the child tasks friends_task = run_fql.s(task_cls=GetFriendsTask, user_id=user_id) tagged_with_me_task = run_fql.s(task_cls=TaggedWithMeTask, user_id=user_id) job_async = group([friends_task, tagged_with_me_task]).apply_async() results = job_async.get() results = merge_dicts(*results) all_friends = results['get_friends'] tagged_with_me = results['tagged_with_me'] # Collapse the tags by user_id, discount by age tag_score_by_user_id = defaultdict(lambda: 0.0) for tag in tagged_with_me.fields: tag_age = datetime.date.today().year - tag['created'].year + 1.0 tag_score_by_user_id[tag['subject']] += 1 / tag_age # Sort user_ids_in_score_order = sorted(tag_score_by_user_id.items(), key=lambda x: x[1]) # Reversing them means the index corresponds to top friends order top_friends_order_by_id = {} for top_friends_order, u in enumerate(user_ids_in_score_order): top_friends_order_by_id[u[0]] = top_friends_order + 1 # 0 is not a valid value # Copy `top_friends_order`s to `all_friends` for id, top_friends_order in top_friends_order_by_id.items(): try: all_friends.fields_by_id[id]['top_friends_order'] = top_friends_order except KeyError: # Means you were tagged with someone you aren't friends with pass # Save user = User.objects.get(id=user_id) # Clear current friends FacebookFriend.objects(user=user).delete() # Bulk insert the new ones docs = [] for friend in results['get_friends'].fields: uid = friend.pop('id') docs.append(FacebookFriend(uid=uid, user=user, **friend)) FacebookFriend.objects.insert(docs) logger.info('Pulled %d friends, %d top friends for user %s' % (len(all_friends), len(top_friends_order_by_id), user.get_id_str())) return results
def join_on_field(self, other_getter, map_fxn=None, new_field_name=None, join_field='id', join_field_1=None, join_field_2=None, discard_orphans=True): """ Joins this getter to another by indexing on a field and applying a mapping function to generate new outputs discard_orphans : do we discard elements in one that don't match the other? """ if join_field != 'id' or join_field_1 or join_field_2: join_field_1 = join_field_1 or join_field join_field_2 = join_field_2 or join_field getter_by_join_field = {element[join_field_1]: element for element in self.fields} other_by_join_field = {element[join_field_2]: element for element in other_getter.fields} else: getter_by_join_field = self._fields_by_id other_by_join_field = other_getter._fields_by_id keys_in_both = set(getter_by_join_field) & set(other_by_join_field) # Run the mapping function if map_fxn: joined = [{ new_field_name: map_fxn(getter_by_join_field[key], other_by_join_field[key]), # join field is same in both, by definition join_field: getter_by_join_field[key][join_field] } for key in keys_in_both] # If no mapping function, just take all existing fields else: joined = [merge_dicts(getter_by_join_field[key], other_by_join_field[key]) for key in keys_in_both] if not discard_orphans: # Append the orphans # Note that this may cause problems if the # mapping function introduced any new fields [joined.append(getter_by_join_field[key]) for key in set(getter_by_join_field) - keys_in_both] [joined.append(other_by_join_field[key]) for key in set(other_by_join_field) - keys_in_both] # Return a new getter return self.from_fields(joined)