def _get_case_case_counts_by_owner(domain, datespan, case_types, is_total=False, owner_ids=None): case_query = (CaseES() .domain(domain) .opened_range(lte=datespan.enddate) .NOT(closed_range_filter(lt=datespan.startdate)) .terms_aggregation('owner_id', 'owner_id') .size(0)) if case_types: case_query = case_query.filter({"terms": {"type.exact": case_types}}) else: case_query = case_query.filter(filters.NOT(case_type_filter('commcare-user'))) if not is_total: case_query = case_query.active_in_range( gte=datespan.startdate, lte=datespan.enddate ) if owner_ids: case_query = case_query.owner(owner_ids) return case_query.run().aggregations.owner_id.counts_by_bucket()
def run_messaging_rule(domain, rule_id): rule = _get_cached_rule(domain, rule_id) if not rule: return progress_helper = MessagingRuleProgressHelper(rule_id) total_cases_count = CaseES().domain(domain).case_type( rule.case_type).count() progress_helper.set_total_cases_to_be_processed(total_cases_count) def _run_rule_sequentially(): incr = 0 progress_helper.set_initial_progress() for case_id in get_case_ids_for_messaging_rule(domain, rule.case_type): sync_case_for_messaging_rule.delay(domain, case_id, rule_id) incr += 1 if incr >= 1000: incr = 0 progress_helper.update_total_key_expiry() if progress_helper.is_canceled(): break # By putting this task last in the queue, the rule should be marked # complete at about the time that the last tasks are finishing up. # This beats saving the task results in the database and using a # celery chord which would be more taxing on system resources. set_rule_complete.delay(rule_id) def _run_rule_on_multiple_shards(): db_aliases = get_db_aliases_for_partitioned_query() progress_helper.set_initial_progress(shard_count=len(db_aliases)) for db_alias in db_aliases: run_messaging_rule_for_shard.delay(domain, rule_id, db_alias) if should_use_sql_backend(domain): _run_rule_on_multiple_shards() else: _run_rule_sequentially()
def _get_case_case_counts_by_owner(domain, datespan, case_types, is_total=False, owner_ids=None, export=False): es_instance = ES_EXPORT_INSTANCE if export else ES_DEFAULT_INSTANCE case_query = (CaseES(es_instance_alias=es_instance) .domain(domain) .opened_range(lte=datespan.enddate.date()) .NOT(closed_range_filter(lt=datespan.startdate.date())) .terms_aggregation('owner_id', 'owner_id') .size(0)) if case_types: case_query = case_query.filter({"terms": {"type.exact": case_types}}) else: case_query = case_query.filter(filters.NOT(case_type_filter('commcare-user'))) if not is_total: case_query = case_query.active_in_range( gte=datespan.startdate.date(), lte=datespan.enddate.date(), ) if owner_ids: case_query = case_query.owner(owner_ids) return case_query.run().aggregations.owner_id.counts_by_bucket()
def _cases_created_per_user_per_month(self, case_type=None): query = (CaseES(es_instance_alias=ES_EXPORT_INSTANCE).domain( self.domain).opened_range( gte=self.date_start, lt=self.date_end).aggregation( TermsAggregation('cases_per_user', 'owner_id', size=100).aggregation( DateHistogram('cases_by_date', 'opened_on', interval='month')))) if case_type: query = query.case_type(case_type) results = query.size(0).run() stats = defaultdict(list) cases_per_user = results.aggregations.cases_per_user for bucket in cases_per_user.buckets_list: counts_by_date = { b['key_as_string']: b['doc_count'] for b in bucket.cases_by_date.normalized_buckets } for key, count in counts_by_date.items(): stats[key].append(count) final_stats = [] for month, case_count_list in sorted(list(stats.items()), key=lambda r: r[0]): final_stats.append( (month, sum(case_count_list) // len(case_count_list))) suffix = '' if case_type: suffix = '(case type: %s)' % case_type self.stdout.write('Cases created per user (estimate)') self._print_table( ['Month', 'Cases created per user %s' % suffix], final_stats)
def _get_es_cases_modified_on_date(self, date): return CaseES().server_modified_range(gte=date, lt=date + relativedelta(months=1)).count()
def _get_blocks(domain): query = (CaseES('report_cases').domain(domain).case_type( ['pregnant_mother', 'baby']).size(0).terms_aggregation('block.#value', 'block')) return query.run().aggregations.block.keys
def get_case_export_base_query(domain, case_type): return (CaseES().domain(domain).case_type(case_type).sort("opened_on"))
def _ledgers_per_case(self): db_name = get_db_aliases_for_partitioned_query()[ 0] # just query one shard DB results = (LedgerValue.objects.using(db_name).filter( domain=self.domain).values('case_id').annotate( ledger_count=Count('pk')))[:100] case_ids = set() ledger_count = 0 for result in results: case_ids.add(result['case_id']) ledger_count += result['ledger_count'] if not case_ids: self.stdout.write("Domain has no ledgers") return avg_ledgers_per_case = ledger_count / len(case_ids) case_types_result = CaseES(for_export=True)\ .domain(self.domain).case_ids(case_ids)\ .aggregation(TermsAggregation('types', 'type.exact'))\ .size(0).run() case_types = case_types_result.aggregations.types.keys self.stdout.write('\nCase Types with Ledgers') for type_ in case_types: self._print_value( 'case_type', type_, CaseES().domain(self.domain).case_type(type_).count()) db_name = get_db_aliases_for_partitioned_query()[ 0] # just query one shard DB results = (CommCareCase.objects.using(db_name).filter( domain=self.domain, closed=True, type=type_).annotate( lifespan=F('closed_on') - F('opened_on')).annotate( avg_lifespan=Avg('lifespan')).values('avg_lifespan', flat=True)) self._print_value('Average lifespan for "%s" cases' % type_, results[0]['avg_lifespan']) self._cases_created_per_user_per_month(type_) self._print_value('Average ledgers per case', avg_ledgers_per_case) stats = defaultdict(list) for db_name, case_ids_p in split_list_by_db_partition(case_ids): transactions_per_case_per_month = ( LedgerTransaction.objects.using(db_name).filter( case_id__in=case_ids).annotate( m=Month('server_date'), y=Year('server_date')).values( 'case_id', 'y', 'm').annotate(count=Count('id'))) for row in transactions_per_case_per_month: month = date(row['y'], row['m'], 1) stats[month].append(row['count']) final_stats = [] for month, transaction_count_list in sorted(list(stats.items()), key=lambda r: r[0]): final_stats.append( (month.isoformat(), sum(transaction_count_list) // len(transaction_count_list))) self.stdout.write('Ledger updates per case') self._print_table(['Month', 'Ledgers updated per case'], final_stats)
def _assert_case_is_in_es(self, case, esquery=CaseES()):
def _doc_counts(self): self._print_value('Total cases', CaseES().domain(self.domain).count()) self._print_value( 'Open cases', CaseES().domain(self.domain).is_closed(False).count()) self._print_value('Total forms', FormES().domain(self.domain).count())
def scroll_case_names(domain, case_ids): query = (CaseES().domain(domain).case_ids(case_ids).source( ['name', '_id']).size(CASE_SCROLL_SIZE)) return query.scroll()
def get_case_export_base_query(domain, case_type): return (CaseES(es_instance_alias=ES_EXPORT_INSTANCE).domain( domain).case_type(case_type).sort("opened_on"))
def _get_case_ids(self): return (CaseES(es_instance_alias=ES_EXPORT_INSTANCE).domain( self.domain).case_type(self.case_type).is_closed(False).term( 'name.exact', '')).get_ids()
def get_number_of_cases_in_domain(domain): return CaseES().domain(domain).count()
def _get_es_modified_dates(domain, case_ids): results = (CaseES(es_instance_alias=ES_EXPORT_INSTANCE).domain( domain).case_ids(case_ids).values_list('_id', 'server_modified_on')) return dict(results)
def get_case_ids_in_domain_since_date(domain, startdate): """ Can only search for cases modified since a date """ return CaseES(for_export=True).domain(domain).server_modified_range(gte=startdate)\ .source(['_id']).run().hits
def get_case_types_for_domain_es(domain): query = (CaseES().domain(domain).size(0).terms_aggregation( "type.exact", "case_types")) return set(query.run().aggregations.case_types.keys)
def get_number_of_cases_in_domain(domain): return CaseES().domain(domain).size(0).run().total
def _ledgers_per_case(self): results = (LedgerES(es_instance_alias=ES_EXPORT_INSTANCE).domain( self.domain).aggregation( TermsAggregation('by_case', 'case_id', size=100)).size(0).run()) ledgers_per_case = results.aggregations.by_case case_ids = set() ledger_counts = [] for case_id, ledger_count in ledgers_per_case.counts_by_bucket().items( ): case_ids.add(case_id) ledger_counts.append(ledger_count) if not case_ids: self.stdout.write("Domain has no ledgers") return avg_ledgers_per_case = sum(ledger_counts) // len(case_ids) case_types_result = CaseES(es_instance_alias=ES_EXPORT_INSTANCE)\ .domain(self.domain).case_ids(case_ids)\ .aggregation(TermsAggregation('types', 'type'))\ .size(0).run() case_types = case_types_result.aggregations.types.keys self.stdout.write('\nCase Types with Ledgers') for type_ in case_types: self._print_value( 'case_type', type_, CaseES().domain(self.domain).case_type(type_).count()) if should_use_sql_backend(self.domain): db_name = get_db_aliases_for_partitioned_query()[ 0] # just query one shard DB results = (CommCareCaseSQL.objects.using(db_name).filter( domain=self.domain, closed=True, type=type_).annotate( lifespan=F('closed_on') - F('opened_on')).annotate( avg_lifespan=Avg('lifespan')).values( 'avg_lifespan', flat=True)) self._print_value('Average lifespan for "%s" cases' % type_, results[0]['avg_lifespan']) self._cases_created_per_user_per_month(type_) self._print_value('Average ledgers per case', avg_ledgers_per_case) if should_use_sql_backend(self.domain): stats = defaultdict(list) for db_name, case_ids_p in split_list_by_db_partition(case_ids): transactions_per_case_per_month = ( LedgerTransaction.objects.using(db_name).filter( case_id__in=case_ids).annotate( m=Month('server_date'), y=Year('server_date')).values( 'case_id', 'y', 'm').annotate(count=Count('id'))) for row in transactions_per_case_per_month: month = date(row['y'], row['m'], 1) stats[month].append(row['count']) else: transactions_per_case_per_month = (StockTransaction.objects.filter( case_id__in=case_ids).annotate( m=Month('report__date'), y=Year('report__date')).values( 'case_id', 'y', 'm').annotate(count=Count('id'))) stats = defaultdict(list) for row in transactions_per_case_per_month: month = date(row['y'], row['m'], 1) stats[month].append(row['count']) final_stats = [] for month, transaction_count_list in sorted(list(stats.items()), key=lambda r: r[0]): final_stats.append( (month.isoformat(), sum(transaction_count_list) // len(transaction_count_list))) self._print_table(['Month', 'Ledgers updated per case'], final_stats)
def _get_closed_hh_cases(self, owners): query = (CaseES(es_instance_alias='export').is_closed().domain( 'icds-cas').case_type('household').owner(owners).source( ['case_id', 'closed_on', 'name']).size(100)) return query.scroll()
def test_unsupported_domain(self): self._create_case_and_sync_to_es('unsupported-domain') results = CaseES('report_cases').run() self.assertEqual(0, results.total)
def _get_child_cases(self, household_ids): query = (CaseES( es_instance_alias='export').domain('icds-cas').case_type( 'person').is_closed(False).source(SOURCE_FIELDS).filter( filters.term("indices.referenced_id", household_ids))) return query.run()
def _assert_index_empty(self, esquery=CaseES()): results = esquery.run() self.assertEqual(0, results.total)
def get_case_export_base_query(domain, case_type): return (CaseES(for_export=True).domain(domain).case_type(case_type).sort( "opened_on").sort('inserted_at', reset_sort=False))