def test_follow(self): # test follow with MySQL self._test_follow() # test follow with HBase self.clear_cache() GateKeeper.set_kv('switch_friendship_to_hbase', 'percent', 100) self._test_follow()
def clear_cache(self): caches['testing'].clear() RedisClient.clear() # 可以手动切换是否使用 Hbase GateKeeper.turn_on('switch_newsfeed_to_hbase') GateKeeper.turn_on('switch_friendship_to_hbase')
def test_fanout_main_task(self): tweet = self.create_tweet(self.linghu, 'tweet 1') self.create_friendship(self.dongxie, self.linghu) if GateKeeper.is_switch_on('switch_newsfeed_to_hbase'): msg = fanout_newsfeeds_main_task(tweet.id, tweet.timestamp, self.linghu.id) self.assertEqual(1 + 1, len(HBaseNewsFeed.filter(prefix=(None, None)))) else: msg = fanout_newsfeeds_main_task(tweet.id, tweet.created_at, self.linghu.id) self.assertEqual(1 + 1, NewsFeed.objects.count()) self.assertEqual(msg, '1 newsfeeds going to fanout, 1 batches created.') cached_list = NewsFeedService.get_cached_newsfeeds(self.linghu.id) self.assertEqual(len(cached_list), 1) for i in range(2): user = self.create_user('user{}'.format(i)) self.create_friendship(user, self.linghu) tweet = self.create_tweet(self.linghu, 'tweet 2') if GateKeeper.is_switch_on('switch_newsfeed_to_hbase'): msg = fanout_newsfeeds_main_task(tweet.id, tweet.timestamp, self.linghu.id) self.assertEqual(4 + 2, len(HBaseNewsFeed.filter(prefix=(None, None)))) else: msg = fanout_newsfeeds_main_task(tweet.id, tweet.created_at, self.linghu.id) self.assertEqual(4 + 2, NewsFeed.objects.count()) self.assertEqual(msg, '3 newsfeeds going to fanout, 1 batches created.') cached_list = NewsFeedService.get_cached_newsfeeds(self.linghu.id) self.assertEqual(len(cached_list), 2) user = self.create_user('another user') self.create_friendship(user, self.linghu) tweet = self.create_tweet(self.linghu, 'tweet 3') if GateKeeper.is_switch_on('switch_newsfeed_to_hbase'): msg = fanout_newsfeeds_main_task(tweet.id, tweet.timestamp, self.linghu.id) self.assertEqual(8 + 3, len(HBaseNewsFeed.filter(prefix=(None, None)))) else: msg = fanout_newsfeeds_main_task(tweet.id, tweet.created_at, self.linghu.id) self.assertEqual(8 + 3, NewsFeed.objects.count()) self.assertEqual(msg, '4 newsfeeds going to fanout, 2 batches created.') cached_list = NewsFeedService.get_cached_newsfeeds(self.linghu.id) self.assertEqual(len(cached_list), 3) cached_list = NewsFeedService.get_cached_newsfeeds(self.dongxie.id) self.assertEqual(len(cached_list), 3)
def setUp(self) -> None: super(FriendshipPaginationTest, self).setUp() GateKeeper.set_kv('switch_friendship_to_hbase', 'percent', 100) self.user1, self.user1_client = self.create_user_and_client( username='******') self.user2, self.user2_client = self.create_user_and_client( username='******') # create followings and followers for user2 for i in range(2): follower = self.create_user('user2_follower{}'.format(i)) self.create_friendship(from_user=follower, to_user=self.user2) for i in range(3): following = self.create_user('user2_following{}'.format(i)) self.create_friendship(from_user=self.user2, to_user=following)
def get_following_user_id_set(cls, from_user_id): # cache set in Redis name = FOLLOWINGS_PATTERN.format(user_id=from_user_id) user_id_set = RedisHelper.get_all_members_from_set(name) # cache hit, need to change b'123' -> 123 if user_id_set is not None: user_id_set_new = set([]) for user_id in user_id_set: if isinstance(user_id, bytes): user_id = user_id.decode('utf-8') user_id_set_new.add(int(user_id)) return user_id_set_new # cache miss if not GateKeeper.is_switch_on('switch_friendship_to_hbase'): friendships = Friendship.objects.filter(from_user_id=from_user_id) else: friendships = HBaseFollowing.filter(prefix=(from_user_id, None)) user_id_set = set([ fs.to_user_id for fs in friendships ]) # push in Redis RedisHelper.add_id_to_set(name, user_id_set) return user_id_set
def follow(cls, from_user_id, to_user_id): if from_user_id == to_user_id: return None if not GateKeeper.is_switched_on('switch_friendship_to_hbase'): # create data in mysql return Friendship.objects.create( from_user_id=from_user_id, to_user_id=to_user_id, ) # create data in hbase # first we retrieve the current time and store it in a local variable # in case there are inconsistencies between the two records that we created in those two tables now = int(time.time() * 1000000) HBaseFollower.create( from_user_id=from_user_id, to_user_id=to_user_id, created_at=now, ) return HBaseFollowing.create( from_user_id=from_user_id, to_user_id=to_user_id, created_at=now, )
def batch_create(cls, batch_params): # 错误的方法 # 不可以将数据库操作放在 for 循环里面,效率会非常低 # for follower in FriendshipService.get_followers(tweet.user): # NewsFeed.objects.create( # user=follower, # tweet=tweet, # ) # 正确的方法:使用 bulk_create,会把 insert 语句合成一条 # newsfeeds = [ # NewsFeed(user_id=follower_id, tweet_id=tweet_id) # for follower_id in follower_ids # ] # NewsFeed.objects.bulk_create(newsfeeds) if GateKeeper.is_switch_on('switch_newsfeed_to_hbase'): newsfeeds = HBaseNewsFeed.batch_create(batch_params) else: newsfeeds = [NewsFeed(**params) for params in batch_params] NewsFeed.objects.bulk_create(newsfeeds) # bulk create 不会触发 post_save 的 signal,所以需要手动 push 到 cache 里 for newsfeed in newsfeeds: NewsFeedService.push_newsfeed_to_cache(newsfeed) return newsfeeds
def _lazy_load(limit): if GateKeeper.is_switch_on('switch_newsfeed_to_hbase'): return HBaseNewsFeed.filter(prefix=(user_id, None), limit=limit, reverse=True) return NewsFeed.objects.filter( user_id=user_id).order_by('-created_at')[:limit]
def get_following_user_id_set(cls, from_user_id): if not GateKeeper.is_switch_on('switch_friendship_to_hbase'): friendships = Friendship.objects.filter(from_user_id=from_user_id) else: friendships = HBaseFollowing.filter(prefix=(from_user_id, )) user_id_set = set([fs.to_user_id for fs in friendships]) return user_id_set
def unfollow(cls, from_user_id, to_user_id): if from_user_id == to_user_id: return 0 # Queryset 的 delete 操作返回两个值,一个是删了多少数据,一个是具体每种类型删了多少 # 为什么会出现多种类型数据的删掉?因为可能因为 foreign key 设置了 cascade 出现级联 # 删除,也就是比如 A model 的某个属性是 B model 的 foreign key,并且设置了 # on_delete = models.CASCADE,那么当 B 的某个数据被删除的时候,A 中的关联也会被删除。 # 所以 CASCADE 是很危险的,一般最好不要用,而是用 on_delete = models.SET_NULL # 取而代之,这样至少可以避免误删除操作带来的多米诺效应 if not GateKeeper.is_switch_on('switch_friendship_to_hbase'): deleted, _ = Friendship.objects.filter( from_user_id=from_user_id, to_user_id=to_user_id, ).delete() return deleted instance = cls.get_follow_instance(from_user_id, to_user_id) if instance is None: return 0 HBaseFollowing.delete(from_user_id=from_user_id, created_at=instance.created_at) HBaseFollower.delete(to_user_id=to_user_id, created_at=instance.created_at) return 1
def count(cls, user_id=None): # for test only if GateKeeper.is_switch_on('switch_newsfeed_to_hbase'): return len(HBaseNewsFeed.filter(prefix=(user_id, ))) if user_id is None: return NewsFeed.objects.count() return NewsFeed.objects.filter(user_id=user_id).count()
def follow(cls, from_user_id, to_user_id): if from_user_id == to_user_id: return None # update Redis cls.add_following_id_in_redis(from_user_id, to_user_id) # update DB if not GateKeeper.is_switch_on('switch_friendship_to_hbase'): # create data in MySQL return Friendship.objects.create( from_user_id=from_user_id, to_user_id=to_user_id, ) # create data in hbase now = int(time.time() * 1000000) HBaseFollower.create( from_user_id=from_user_id, to_user_id=to_user_id, created_at=now, ) return HBaseFollowing.create( from_user_id=from_user_id, to_user_id=to_user_id, created_at=now, )
def create(cls, **kwargs): if GateKeeper.is_switch_on('switch_newsfeed_to_hbase'): newsfeed = HBaseNewsFeed.create(**kwargs) # 需要手动触发 cache 更改,因为没有 listener 监听 hbase create cls.push_newsfeed_to_cache(newsfeed) else: newsfeed = NewsFeed.objects.create(**kwargs) return newsfeed
def create_newsfeed(self, user, tweet): if GateKeeper.is_switch_on('switch_newsfeed_to_hbase'): created_at = tweet.timestamp else: created_at = tweet.created_at return NewsFeedService.create(user_id=user.id, tweet_id=tweet.id, created_at=created_at)
def followings(self, request, pk): if GateKeeper.is_switch_on('switch_friendship_to_hbase'): page = self.paginator.paginate_hbase(HBaseFollowing, (pk,), request) else: friendships = Friendship.objects.filter(from_user_id=pk).order_by('-created_at') page = self.paginate_queryset(friendships) serializer = FollowingSerializer(page, many=True, context={'request': request}) return self.paginator.get_paginated_response(serializer.data)
def get_cached_newsfeeds(cls, user_id): key = USER_NEWSFEEDS_PATTERN.format(user_id=user_id) if GateKeeper.is_switch_on('switch_newsfeed_to_hbase'): serializer = HBaseModelSerializer else: serializer = DjangoModelSerializer return RedisHelper.load_objects(key, lazy_load_newsfeeds(user_id), serializer=serializer)
def get_following_user_id_set(cls, from_user_id): # <TODO> cache in redis set if not GateKeeper.is_switched_on('switch_friendship_to_hbase'): friendships = Friendship.objects.filter(from_user_id=from_user_id) else: friendships = HBaseFollowing.filter(prefix=(from_user_id, None)) # we use a set to hold a data because it is fast to check whether an elements exists there user_id_set = set([fs.to_user_id for fs in friendships]) return user_id_set
def has_followed(cls, from_user_id, to_user_id): if from_user_id == to_user_id: return True if not GateKeeper.is_switch_on('switch_friendship_to_hbase'): return Friendship.objects.filter( from_user_id=from_user_id, to_user_id=to_user_id, ).exists() instance = cls.get_follow_instance(from_user_id, to_user_id) return instance is not None
def batch_create(cls, batch_params): if GateKeeper.is_switch_on('switch_newsfeed_to_hbase'): newsfeeds = HBaseNewsFeed.batch_create(batch_params) else: newsfeeds = [NewsFeed(**params) for params in batch_params] NewsFeed.objects.bulk_create(newsfeeds) # bulk create 不会触发 post_save 的 signal,所以需要手动 push 到 cache 里 for newsfeed in newsfeeds: NewsFeedService.push_newsfeed_to_cache(newsfeed) return newsfeeds
def followers(self, request, pk): pk = int(pk) paginator = self.paginator if GateKeeper.is_switch_on('switch_friendship_to_hbase'): page = paginator.paginate_hbase(HBaseFollower, (pk,), request) else: # GET /api/friendships/1/followers/ 关注了User_id=1的用户 friendships = Friendship.objects.filter(to_user_id = pk).order_by('-created_at') page = paginator.paginate_queryset(friendships) serializer = FollowerSerializer(page, many=True, context={'request': request}) return paginator.get_paginated_response(serializer.data)
def create_newsfeed(self, user, tweet): # 默认ORM created_at: 2021-11-11 11:11:11.123456 iso format # HBaseModel: timestamp format: 16位int # 兼容 iso 格式和 int 格式的时间戳 if GateKeeper.is_switch_on('switch_newsfeed_to_hbase'): created_at = tweet.timestamp else: created_at = tweet.created_at return NewsFeedService.create(user_id=user.id, tweet_id=tweet.id, created_at=created_at)
def followers(self, request, pk): paginator = self.paginator # select out followers for a specific user if GateKeeper.is_switch_on('switch_friendship_to_hbase'): page = paginator.paginate_hbase(HBaseFollower, (pk, ), request) else: friendships = Friendship.objects.filter( to_user_id=pk).order_by('-created_at') page = paginator.paginate_queryset(friendships) serializer = FriendshipFollowerSerializer(page, many=True, context={'request': request}) return paginator.get_paginated_response(serializer.data)
def list(self, request): paginator = self.paginator # list out followers or followings with Rest Framework Query Style # check query type if request.query_params['type'] not in ['followers', 'followings']: return Response( "Please check input. Query type need to be in ['followers', 'followings]", status=status.HTTP_400_BAD_REQUEST, ) # query friendships query_type = request.query_params['type'] user_id = request.query_params['user_id'] if query_type == 'followers': if GateKeeper.is_switch_on('switch_friendship_to_hbase'): page = paginator.paginate_hbase(HBaseFollower, (user_id, ), request) else: friendships = Friendship.objects.filter(to_user_id=user_id) page = paginator.paginate_queryset(friendships) serializer = FriendshipFollowerSerializer( page, many=True, context={'request': request}, ) else: if GateKeeper.is_switch_on('switch_friendship_to_hbase'): page = paginator.paginate_hbase(HBaseFollowing, (user_id, ), request) else: friendships = Friendship.objects.filter(from_user_id=user_id) page = paginator.paginate_queryset(friendships) serializer = FriendshipFollowingSerializer( page, many=True, context={'request': request}, ) return paginator.get_paginated_response(serializer.data)
def get_following_user_id_set(cls, from_user_id): key = FOLLOWING_PATTERN.format(user_id=from_user_id) user_id_set = cache.get(key) # set exists in cache if user_id_set is not None: return user_id_set # set doesn't exist in cache, build one from the DB if GateKeeper.is_switch_on('switch_friendship_to_hbase'): friendships = HBaseFollowing.filter(prefix=(from_user_id, None)) else: friendships = Friendship.objects.filter(from_user_id=from_user_id) user_id_set = set([fs.to_user_id for fs in friendships]) cache.set(key, user_id_set) # save in cache return user_id_set
def followers(self, request, pk): pk = int(pk) if GateKeeper.is_switch_on('switch_friendship_to_hbase'): page = self.paginator.paginate_hbase(HBaseFollower, (pk, ), request) else: friendships = Friendship.objects.filter( to_user_id=pk).order_by('-created_at') page = self.paginator.paginate_queryset(friendships, request) # many=True tell drf that queryset contains multiple items (a list of items) serializer = FollowerSerializer(page, many=True, context={'request': request}) return self.paginator.get_paginated_response(serializer.data)
def followers(self, request, pk): # get /api/friendships/1/followers/ # get http://172.24.76.93:8000/api/friendships/3/followers/ if GateKeeper.is_switch_on('switch_friendship_to_hbase'): page = self.paginator.paginate_hbase(HBaseFollower, (pk, ), request) else: friendships = Friendship.objects.filter( to_user_id=pk).order_by('-created_at') page = self.paginate_queryset(friendships) serializer = FollowerSerializer(page, many=True, context={'request': request}) return self.paginator.get_paginated_response(serializer.data)
def fanout_to_followers(cls, tweet): # 这句话的作用是,在 celery 配置的 message queue 中创建一个 fanout 的任务 # 参数是 tweet。任意一个在监听 message queue 的 worker 进程都有机会拿到这个任务 # worker 进程中会执行 fanout_newsfeeds_task 里的代码来实现一个异步的任务处理 # 如果这个任务需要处理 10s 则这 10s 会花费在 worker 进程上,而不是花费在用户发 tweet # 的过程中。所以这里 .delay 操作会马上执行马上结束从而不影响用户的正常操作。 # (因为这里只是创建了一个任务,把任务信息放在了 message queue 里,并没有真正执行这个函数) # 要注意的是,delay 里的参数必须是可以被 celery serialize 的值,因为 worker 进程是一个独立 # 的进程,甚至在不同的机器上,没有办法知道当前 web 进程的某片内存空间里的值是什么。所以 # 我们只能把 tweet.id 作为参数传进去,而不能把 tweet 传进去。因为 celery 并不知道 # 如何 serialize Tweet。 if GateKeeper.is_switch_on('switch_newsfeed_to_hbase'): created_at = tweet.timestamp else: created_at = tweet.created_at fanout_newsfeeds_main_task.delay(tweet.id, created_at, tweet.user_id)
def followers(self, request, pk): pk = int(pk) if GateKeeper.is_switch_on('switch_friendship_to_hbase'): # pk is the row key page = self.paginator.paginate_hbase(HBaseFollower, (pk, ), request) else: friendships = Friendship.objects.filter( to_user_id=pk).order_by('-created_at') page = self.paginate_queryset(friendships) serializer = FollowerSerializer(page, many=True, context={'request': request}) # paginator is the instance of pagination_class return self.paginator.get_paginated_response(serializer.data)
def list(self, request): cached_newsfeeds = NewsFeedService.get_cached_newsfeeds(request.user.id) page = self.paginator.paginate_cached_list(cached_newsfeeds, request) if page is None: if GateKeeper.is_switch_on('switch_newsfeed_to_hbase'): page = self.paginator.paginate_hbase(HBaseNewsFeed, (request.user.id,), request) else: queryset = NewsFeed.objects.filter(user=request.user) page = self.paginate_queryset(queryset) serializer = NewsFeedSerializer( page, context={'request': request}, many=True, ) return self.get_paginated_response(serializer.data)
def test_redis_list_limit(self): list_limit = settings.REDIS_LIST_LENGTH_LIMIT page_size = 20 users = [self.create_user('user{}'.format(i)) for i in range(5)] newsfeeds = [] for i in range(list_limit + page_size): tweet = self.create_tweet(user=users[i % 5], content='feed{}'.format(i)) feed = self.create_newsfeed(self.linghu, tweet) newsfeeds.append(feed) newsfeeds = newsfeeds[::-1] # only cached list_limit objects cached_newsfeeds = NewsFeedService.get_cached_newsfeeds(self.linghu.id) self.assertEqual(len(cached_newsfeeds), list_limit) if GateKeeper.is_switch_on('switch_newsfeed_to_hbase'): count = len(HBaseNewsFeed.filter(prefix=(self.linghu.id, None))) else: count = NewsFeed.objects.filter(user=self.linghu).count() self.assertEqual(count, list_limit + page_size) results = self._paginate_to_get_newsfeeds(self.linghu_client) self.assertEqual(len(results), list_limit + page_size) for i in range(list_limit + page_size): self.assertEqual(newsfeeds[i].created_at, results[i]['created_at']) # a followed user create a new tweet self.create_friendship(self.linghu, self.dongxie) new_tweet = self.create_tweet(self.dongxie, 'a new tweet') NewsFeedService.fanout_to_followers(new_tweet) def _test_newsfeeds_after_new_feed_pushed(): results = self._paginate_to_get_newsfeeds(self.linghu_client) self.assertEqual(len(results), list_limit + page_size + 1) self.assertEqual(results[0]['tweet']['id'], new_tweet.id) for i in range(list_limit + page_size): self.assertEqual(newsfeeds[i].created_at, results[i + 1]['created_at']) _test_newsfeeds_after_new_feed_pushed() # cache expired self.clear_cache() _test_newsfeeds_after_new_feed_pushed()