def test_chunks_queryset(self): # small step chunks = chunkator(Book.objects.all(), 1) result = [] for item in chunks: self.assertTrue(isinstance(item, Book)) result.append(item.pk) self.assertEquals(len(result), 20) self.assertEquals(len(result), len(set(result))) # no duplicates result = [] # larger chunks chunks = chunkator(Book.objects.all(), 10) for item in chunks: self.assertTrue(isinstance(item, Book)) result.append(item.pk) self.assertEquals(len(result), 20) self.assertEquals(len(result), len(set(result))) # no duplicates result = [] # larger than QS chunks chunks = chunkator(Book.objects.all(), 50) for item in chunks: self.assertTrue(isinstance(item, Book), "{}".format(item)) result.append(item.pk) self.assertEquals(len(result), 20) self.assertEquals(len(result), len(set(result))) # no duplicates
def test_chunks_numqueries(self): # Make sure we only run 2 queries # One for each slice with self.assertNumQueries(2): chunks = chunkator(Book.objects.all(), 12) for item in chunks: self.assertTrue(isinstance(item, Book)) # Make sure we only run 3 queries # One for each slice, plus the "empty" one with self.assertNumQueries(3): chunks = chunkator(Book.objects.all(), 10) for item in chunks: self.assertTrue(isinstance(item, Book))
def test_chunk_uuid(self): result = [] chunks = chunkator(User.objects.all(), 10) for item in chunks: self.assertTrue(isinstance(item, User)) result.append(item.pk) self.assertEquals(len(result), 2) self.assertEquals(len(result), len(set(result))) # no duplicates
def test_chunk_uuid(self): result = [] chunks = chunkator(User.objects.all().values("pk", "name"), 10) for item in chunks: self.assertTrue(isinstance(item, dict)) result.append(item['pk']) self.assertEquals(len(result), 2) self.assertEquals(len(result), len(set(result))) # no duplicates
def test_query_log(self): query_log_output = cStringIO.StringIO() qs = User.objects.all() # We loop here only to dig into the generator and force execution for item in chunkator(qs, 1, query_log_output): _ = item # noqa contents = query_log_output.getvalue() query_log_output.close() queries = contents.split('\n') self.assertEquals(len(queries), 5, queries) queries = queries[:4] # the last one is empty string for query in queries: # Should be 0 for the first query # Should occur once for other queries self.assertTrue(query.count('."uuid" >') <= 1, query)
def run(): """ 30분마다 실행되는 Github user 업데이트 : github api가 rate_limit 걸려서 더이상 호출하지 못하는 경우 """ start_time = timeit.default_timer() # 시작 시간 체크 update_user_queue_qs = UpdateUserQueue.objects.filter( status__in=[UpdateUserQueue.READY, UpdateUserQueue.FAIL]) if not update_user_queue_qs: return try: # 스크립트를 시작하기전 rate_limit 를 체크한다. rate_limit_check_service = GithubInformationService( is_insert_queue=False) rate_limit_check_service.get_rate_remaining() except RateLimit: return SlackService.slack_update_github_user(status='시작', message='') update_user_count = 0 for user_queue in chunkator(update_user_queue_qs, 1000): try: github_information_service = GithubInformationService( user_queue.username, False) github_information_service.update() user_queue.status = UpdateUserQueue.SUCCESS user_queue.save(update_fields=['status']) update_user_count += 1 except RateLimit: # rate limit 면 다른 유저들도 업데이드 못함 SlackService.slack_notify_update_fail( message=f'Rate Limit 로 인해 업데이트가 실패되었습니다. ' f'{update_user_count}명만 업데이트 되었습니다.😭') break except Exception as e: capture_exception(e) terminate_time = timeit.default_timer() # 종료 시간 체크 SlackService.slack_update_github_user( status='완료', message=f'업데이트가 {terminate_time - start_time:.2f}초 걸렸습니다.', update_user=update_user_count)
def update_language_rank(): """ 언어별 count 값으로 랭킹 랭킹 업데이트 도중 하나라도 오류가 나면 원상복구 """ for language in chunkator(Language.objects.all(), 1000): user_languages = UserLanguage.objects.filter( language_id=language.id).order_by('-number')[:10] with transaction.atomic(): # 랭킹 업데이트 도중 하나라도 오류가 나면 원상복구 for order, user_language in enumerate(user_languages): user_rank, is_created = UserRank.objects.get_or_create( type=f'lang-{language.type}', ranking=order + 1) user_rank.github_user_id = user_language.github_user_id user_rank.score = user_language.number user_rank.save(update_fields=['github_user_id', 'score'])
def run(): """ 기본적으로 업데이트 되야할 유저 기본정보 업데이트 (업데이트 한지 일주일 이내 유저 제외) """ try: # 스크립트를 시작하기전 rate_limit 를 체크한다. rate_limit_check_service = GithubInformationService( is_insert_queue=False) rate_limit_check_service.get_rate_remaining() except RateLimit: return github_user_qs = GithubUser.objects.filter(updated__lte=datetime.now() - timedelta(7)) if not github_user_qs: return start_time = timeit.default_timer() SlackService.slack_update_basic_info(status='시작', message='') update_user_count = 0 with concurrent.futures.ThreadPoolExecutor() as executor: # max_worker default = min(32, os.cpu_count() + 4) for github_user in chunkator(github_user_qs, 1000): try: executor.submit(update_github_basic_information, github_user) update_user_count += 1 except RateLimit: SlackService.slack_notify_update_fail( message=f'Rate Limit 로 인해 업데이트가 실패되었습니다. ' f'{update_user_count}명만 업데이트 되었습니다.😭') except GitHubUserDoesNotExist: continue terminate_time = timeit.default_timer() SlackService.slack_update_basic_info( status='완료', message=f'업데이트가 {terminate_time - start_time:.2f}초 걸렸습니다. ' f'🤖 API 호출 남은 횟수 : {rate_limit_check_service.get_rate_remaining()}', update_user=update_user_count)
def handle(self, *args, **options): print('init...') for region in region_choices: # CasePenalty.objects.filter(case__court__region=region[0]).delete() count = 0 for case in chunkator(Case.objects.filter(court__region=region[0], penalties__isnull=True, result_text__isnull=False, type=1, stage=1), 100): count += 1 if count == 1: print(f'started...') if not case.penalties.count(): case.process_result_text() if count % 100 == 0: print(count, region[1]) # if count > 2000: # здесь определяем число дел, которые обрабатываем # break print(f'Ended {region[1]}')
def update_user_ranking(): """ total score 로 전체 유저의 순위를 계산하는 함수 """ github_users = GithubUser.objects.all() for github_user in chunkator(github_users, 1000): try: github_user.previous_user_rank = github_user.user_rank github_user.user_rank = GithubInformationService.update_user_ranking( github_user.total_score) github_user.tier = GithubInformationService.get_tier_statistics( github_user.user_rank) github_user.save( update_fields=['user_rank', 'previous_user_rank', 'tier']) except GitHubUserDoesNotExist: continue except Exception as e: capture_exception(e)
def handle(self, *args, **options): print('init...') count = 0 d = Defendant.objects.filter(first_name__isnull=False) for defendant in chunkator(d, 100): count += 1 if count == 1: print('started...') defendant.gender = defendant.get_gender() defendant.save() if count % 100 == 0: print(count) if count > 250: # здесь определяем число дел, которые обрабатываем break
def handle(self, *args, **options): print('init...') print( Case.objects.filter(penalties__isnull=False, result_type__isnull=True, stage=1, type=1).count()) for region in region_choices: count = 0 for case in chunkator( Case.objects.filter(court__region=region[0], penalties__isnull=False, result_type__isnull=True, stage=1, type=1), 100): count += 1 case.result_type = 'Вынесено постановление о назначении административного наказания' case.save() if count % 100 == 0: print(count) print(f'Ended {region[1]}')
def test_chunk_missing_pk(self): with self.assertRaises(MissingPkFieldException): result = chunkator(User.objects.all().values("name"), 10) six.next(result)
def test_order_by_default(self): items = list(chunkator(Book.objects.all(), 10)) self.assertEquals(items[0].pk, 1) self.assertEquals(items[1].pk, 2)
def test_chunk_missing_pk(self): with self.assertRaises(MissingPkFieldException): result = chunkator(User.objects.all().values("name"), 10) result.next()