def queue_async_indicators(): """ Fetches AsyncIndicators that 1. were not queued till now or were last queued more than 4 hours ago 2. have failed less than ASYNC_INDICATOR_MAX_RETRIES times This task quits after it has run for more than ASYNC_INDICATOR_QUEUE_TIME - 30 seconds i.e 4 minutes 30 seconds. While it runs, it clubs fetched AsyncIndicators by domain and doc type and queue them for processing. """ start = datetime.utcnow() cutoff = start + ASYNC_INDICATOR_QUEUE_TIME - timedelta(seconds=30) retry_threshold = start - timedelta(hours=4) # don't requeue anything that has been retried more than ASYNC_INDICATOR_MAX_RETRIES times indicators = AsyncIndicator.objects.filter(unsuccessful_attempts__lt=ASYNC_INDICATOR_MAX_RETRIES)[:settings.ASYNC_INDICATORS_TO_QUEUE] indicators_by_domain_doc_type = defaultdict(list) # page so that envs can have arbitarily large settings.ASYNC_INDICATORS_TO_QUEUE for indicator in paginated_queryset(indicators, 1000): # only requeue things that are not in queue or were last queued earlier than the threshold if not indicator.date_queued or indicator.date_queued < retry_threshold: indicators_by_domain_doc_type[(indicator.domain, indicator.doc_type)].append(indicator) for k, indicators in indicators_by_domain_doc_type.items(): _queue_indicators(indicators) if datetime.utcnow() > cutoff: break
def get_all_rows(self): if self.warehouse: user_ids = self.get_user_ids() rows = ApplicationStatusFact.objects.filter( user_dim__user_id__in=user_ids).select_related( 'user_dim', 'app_dim') if self.selected_app_id: rows = rows.filter( app_dim__application_id=self.selected_app_id) self._total_records = rows.count() return self.process_facts(paginated_queryset(rows, 10000)) else: users = self.user_query(False).scroll() self._total_records = self.user_query(False).count() return self.process_rows(users, True)
def update_build_version_for_app(domain, app_id, check_only): CHUNK_SIZE = 1000 fact_chunks = chunked( paginated_queryset( ApplicationStatusFact.objects.order_by('-id').filter( domain=domain, last_form_app_build_version__isnull=True, app_dim__application_id=app_id).select_related( 'user_dim').all(), CHUNK_SIZE), CHUNK_SIZE) version_by_build_id = {} def memoized_get_versions(build_ids): new = set(build_ids) - set(version_by_build_id.keys()) if new: versions = ApplicationDim.objects.filter( domain=domain, application_id__in=new).values_list('application_id', 'version') for k, v in versions: version_by_build_id[k] = v return { k: version_by_build_id[k] for k in build_ids if k in version_by_build_id } for fact_chunk in fact_chunks: facts_by_user_ids = {f.user_dim.user_id: f for f in fact_chunk} build_ids_by_user_ids = get_latest_build_ids( domain, app_id, list(facts_by_user_ids.keys())) build_ids = list(build_ids_by_user_ids.values()) version_by_build_id = memoized_get_versions(build_ids) facts_to_update = [] for user_id, fact in facts_by_user_ids.items(): build_id = build_ids_by_user_ids.get(user_id, '') version = version_by_build_id.get(build_id, '') if not fact.last_form_app_build_version and version: fact.last_form_app_build_version = version facts_to_update.append(fact) if check_only: for fact in facts_to_update: print("Fact ID {}, user {}, version {}".format( fact.id, fact.user_dim.user_id, fact.last_form_app_build_version)) else: print("Updating {} facts for app {}".format( len(facts_to_update), app_id)) bulk_update(facts_to_update)
def iter_all_changes(self, start_from=None): model_list = self.model_class.objects.all() for model in paginated_queryset(model_list, self.chunk_size): yield self.model_to_change_fn(model)