Example #1
0
def _get_case_case_counts_by_owner(domain, datespan, case_types, is_total=False, owner_ids=None):
    case_query = (CaseES()
         .domain(domain)
         .opened_range(lte=datespan.enddate)
         .NOT(closed_range_filter(lt=datespan.startdate))
         .terms_aggregation('owner_id', 'owner_id')
         .size(0))

    if case_types:
        case_query = case_query.filter({"terms": {"type.exact": case_types}})
    else:
        case_query = case_query.filter(filters.NOT(case_type_filter('commcare-user')))

    if not is_total:
        case_query = case_query.active_in_range(
            gte=datespan.startdate,
            lte=datespan.enddate
        )

    if owner_ids:
        case_query = case_query.owner(owner_ids)

    return case_query.run().aggregations.owner_id.counts_by_bucket()
Example #2
0
def run_messaging_rule(domain, rule_id):
    rule = _get_cached_rule(domain, rule_id)
    if not rule:
        return
    progress_helper = MessagingRuleProgressHelper(rule_id)
    total_cases_count = CaseES().domain(domain).case_type(
        rule.case_type).count()
    progress_helper.set_total_cases_to_be_processed(total_cases_count)

    def _run_rule_sequentially():
        incr = 0
        progress_helper.set_initial_progress()
        for case_id in get_case_ids_for_messaging_rule(domain, rule.case_type):
            sync_case_for_messaging_rule.delay(domain, case_id, rule_id)
            incr += 1
            if incr >= 1000:
                incr = 0
                progress_helper.update_total_key_expiry()
                if progress_helper.is_canceled():
                    break

        # By putting this task last in the queue, the rule should be marked
        # complete at about the time that the last tasks are finishing up.
        # This beats saving the task results in the database and using a
        # celery chord which would be more taxing on system resources.
        set_rule_complete.delay(rule_id)

    def _run_rule_on_multiple_shards():
        db_aliases = get_db_aliases_for_partitioned_query()
        progress_helper.set_initial_progress(shard_count=len(db_aliases))
        for db_alias in db_aliases:
            run_messaging_rule_for_shard.delay(domain, rule_id, db_alias)

    if should_use_sql_backend(domain):
        _run_rule_on_multiple_shards()
    else:
        _run_rule_sequentially()
Example #3
0
def _get_case_case_counts_by_owner(domain, datespan, case_types, is_total=False, owner_ids=None, export=False):
    es_instance = ES_EXPORT_INSTANCE if export else ES_DEFAULT_INSTANCE
    case_query = (CaseES(es_instance_alias=es_instance)
         .domain(domain)
         .opened_range(lte=datespan.enddate.date())
         .NOT(closed_range_filter(lt=datespan.startdate.date()))
         .terms_aggregation('owner_id', 'owner_id')
         .size(0))

    if case_types:
        case_query = case_query.filter({"terms": {"type.exact": case_types}})
    else:
        case_query = case_query.filter(filters.NOT(case_type_filter('commcare-user')))

    if not is_total:
        case_query = case_query.active_in_range(
            gte=datespan.startdate.date(),
            lte=datespan.enddate.date(),
        )

    if owner_ids:
        case_query = case_query.owner(owner_ids)

    return case_query.run().aggregations.owner_id.counts_by_bucket()
Example #4
0
    def _cases_created_per_user_per_month(self, case_type=None):
        query = (CaseES(es_instance_alias=ES_EXPORT_INSTANCE).domain(
            self.domain).opened_range(
                gte=self.date_start, lt=self.date_end).aggregation(
                    TermsAggregation('cases_per_user', 'owner_id',
                                     size=100).aggregation(
                                         DateHistogram('cases_by_date',
                                                       'opened_on',
                                                       interval='month'))))
        if case_type:
            query = query.case_type(case_type)

        results = query.size(0).run()

        stats = defaultdict(list)
        cases_per_user = results.aggregations.cases_per_user
        for bucket in cases_per_user.buckets_list:
            counts_by_date = {
                b['key_as_string']: b['doc_count']
                for b in bucket.cases_by_date.normalized_buckets
            }
            for key, count in counts_by_date.items():
                stats[key].append(count)

        final_stats = []
        for month, case_count_list in sorted(list(stats.items()),
                                             key=lambda r: r[0]):
            final_stats.append(
                (month, sum(case_count_list) // len(case_count_list)))

        suffix = ''
        if case_type:
            suffix = '(case type: %s)' % case_type
        self.stdout.write('Cases created per user (estimate)')
        self._print_table(
            ['Month', 'Cases created per user %s' % suffix], final_stats)
 def _get_es_cases_modified_on_date(self, date):
     return CaseES().server_modified_range(gte=date,
                                           lt=date +
                                           relativedelta(months=1)).count()
Example #6
0
def _get_blocks(domain):
    query = (CaseES('report_cases').domain(domain).case_type(
        ['pregnant_mother',
         'baby']).size(0).terms_aggregation('block.#value', 'block'))
    return query.run().aggregations.block.keys
Example #7
0
def get_case_export_base_query(domain, case_type):
    return (CaseES().domain(domain).case_type(case_type).sort("opened_on"))
Example #8
0
    def _ledgers_per_case(self):
        db_name = get_db_aliases_for_partitioned_query()[
            0]  # just query one shard DB
        results = (LedgerValue.objects.using(db_name).filter(
            domain=self.domain).values('case_id').annotate(
                ledger_count=Count('pk')))[:100]

        case_ids = set()
        ledger_count = 0
        for result in results:
            case_ids.add(result['case_id'])
            ledger_count += result['ledger_count']

        if not case_ids:
            self.stdout.write("Domain has no ledgers")
            return

        avg_ledgers_per_case = ledger_count / len(case_ids)
        case_types_result = CaseES(for_export=True)\
            .domain(self.domain).case_ids(case_ids)\
            .aggregation(TermsAggregation('types', 'type.exact'))\
            .size(0).run()

        case_types = case_types_result.aggregations.types.keys

        self.stdout.write('\nCase Types with Ledgers')
        for type_ in case_types:
            self._print_value(
                'case_type', type_,
                CaseES().domain(self.domain).case_type(type_).count())
            db_name = get_db_aliases_for_partitioned_query()[
                0]  # just query one shard DB
            results = (CommCareCase.objects.using(db_name).filter(
                domain=self.domain, closed=True, type=type_).annotate(
                    lifespan=F('closed_on') - F('opened_on')).annotate(
                        avg_lifespan=Avg('lifespan')).values('avg_lifespan',
                                                             flat=True))
            self._print_value('Average lifespan for "%s" cases' % type_,
                              results[0]['avg_lifespan'])

            self._cases_created_per_user_per_month(type_)

        self._print_value('Average ledgers per case', avg_ledgers_per_case)

        stats = defaultdict(list)
        for db_name, case_ids_p in split_list_by_db_partition(case_ids):
            transactions_per_case_per_month = (
                LedgerTransaction.objects.using(db_name).filter(
                    case_id__in=case_ids).annotate(
                        m=Month('server_date'), y=Year('server_date')).values(
                            'case_id', 'y', 'm').annotate(count=Count('id')))
            for row in transactions_per_case_per_month:
                month = date(row['y'], row['m'], 1)
                stats[month].append(row['count'])

        final_stats = []
        for month, transaction_count_list in sorted(list(stats.items()),
                                                    key=lambda r: r[0]):
            final_stats.append(
                (month.isoformat(),
                 sum(transaction_count_list) // len(transaction_count_list)))

        self.stdout.write('Ledger updates per case')
        self._print_table(['Month', 'Ledgers updated per case'], final_stats)
Example #9
0
 def _assert_case_is_in_es(self, case, esquery=CaseES()):
 def _doc_counts(self):
     self._print_value('Total cases', CaseES().domain(self.domain).count())
     self._print_value(
         'Open cases',
         CaseES().domain(self.domain).is_closed(False).count())
     self._print_value('Total forms', FormES().domain(self.domain).count())
Example #11
0
def scroll_case_names(domain, case_ids):
    query = (CaseES().domain(domain).case_ids(case_ids).source(
        ['name', '_id']).size(CASE_SCROLL_SIZE))
    return query.scroll()
Example #12
0
def get_case_export_base_query(domain, case_type):
    return (CaseES(es_instance_alias=ES_EXPORT_INSTANCE).domain(
        domain).case_type(case_type).sort("opened_on"))
Example #13
0
 def _get_case_ids(self):
     return (CaseES(es_instance_alias=ES_EXPORT_INSTANCE).domain(
         self.domain).case_type(self.case_type).is_closed(False).term(
             'name.exact', '')).get_ids()
Example #14
0
def get_number_of_cases_in_domain(domain):
    return CaseES().domain(domain).count()
Example #15
0
def _get_es_modified_dates(domain, case_ids):
    results = (CaseES(es_instance_alias=ES_EXPORT_INSTANCE).domain(
        domain).case_ids(case_ids).values_list('_id', 'server_modified_on'))
    return dict(results)
Example #16
0
def get_case_ids_in_domain_since_date(domain, startdate):
    """
    Can only search for cases modified since a date
    """
    return CaseES(for_export=True).domain(domain).server_modified_range(gte=startdate)\
        .source(['_id']).run().hits
Example #17
0
def get_case_types_for_domain_es(domain):
    query = (CaseES().domain(domain).size(0).terms_aggregation(
        "type.exact", "case_types"))
    return set(query.run().aggregations.case_types.keys)
Example #18
0
def get_number_of_cases_in_domain(domain):
    return CaseES().domain(domain).size(0).run().total
    def _ledgers_per_case(self):
        results = (LedgerES(es_instance_alias=ES_EXPORT_INSTANCE).domain(
            self.domain).aggregation(
                TermsAggregation('by_case', 'case_id',
                                 size=100)).size(0).run())

        ledgers_per_case = results.aggregations.by_case
        case_ids = set()
        ledger_counts = []
        for case_id, ledger_count in ledgers_per_case.counts_by_bucket().items(
        ):
            case_ids.add(case_id)
            ledger_counts.append(ledger_count)

        if not case_ids:
            self.stdout.write("Domain has no ledgers")
            return

        avg_ledgers_per_case = sum(ledger_counts) // len(case_ids)
        case_types_result = CaseES(es_instance_alias=ES_EXPORT_INSTANCE)\
            .domain(self.domain).case_ids(case_ids)\
            .aggregation(TermsAggregation('types', 'type'))\
            .size(0).run()

        case_types = case_types_result.aggregations.types.keys

        self.stdout.write('\nCase Types with Ledgers')
        for type_ in case_types:
            self._print_value(
                'case_type', type_,
                CaseES().domain(self.domain).case_type(type_).count())
            if should_use_sql_backend(self.domain):
                db_name = get_db_aliases_for_partitioned_query()[
                    0]  # just query one shard DB
                results = (CommCareCaseSQL.objects.using(db_name).filter(
                    domain=self.domain, closed=True, type=type_).annotate(
                        lifespan=F('closed_on') - F('opened_on')).annotate(
                            avg_lifespan=Avg('lifespan')).values(
                                'avg_lifespan', flat=True))
                self._print_value('Average lifespan for "%s" cases' % type_,
                                  results[0]['avg_lifespan'])

            self._cases_created_per_user_per_month(type_)

        self._print_value('Average ledgers per case', avg_ledgers_per_case)

        if should_use_sql_backend(self.domain):
            stats = defaultdict(list)
            for db_name, case_ids_p in split_list_by_db_partition(case_ids):
                transactions_per_case_per_month = (
                    LedgerTransaction.objects.using(db_name).filter(
                        case_id__in=case_ids).annotate(
                            m=Month('server_date'),
                            y=Year('server_date')).values(
                                'case_id', 'y',
                                'm').annotate(count=Count('id')))
                for row in transactions_per_case_per_month:
                    month = date(row['y'], row['m'], 1)
                    stats[month].append(row['count'])
        else:
            transactions_per_case_per_month = (StockTransaction.objects.filter(
                case_id__in=case_ids).annotate(
                    m=Month('report__date'), y=Year('report__date')).values(
                        'case_id', 'y', 'm').annotate(count=Count('id')))

            stats = defaultdict(list)
            for row in transactions_per_case_per_month:
                month = date(row['y'], row['m'], 1)
                stats[month].append(row['count'])

        final_stats = []
        for month, transaction_count_list in sorted(list(stats.items()),
                                                    key=lambda r: r[0]):
            final_stats.append(
                (month.isoformat(),
                 sum(transaction_count_list) // len(transaction_count_list)))

        self._print_table(['Month', 'Ledgers updated per case'], final_stats)
 def _get_closed_hh_cases(self, owners):
     query = (CaseES(es_instance_alias='export').is_closed().domain(
         'icds-cas').case_type('household').owner(owners).source(
             ['case_id', 'closed_on', 'name']).size(100))
     return query.scroll()
    def test_unsupported_domain(self):
        self._create_case_and_sync_to_es('unsupported-domain')

        results = CaseES('report_cases').run()
        self.assertEqual(0, results.total)
 def _get_child_cases(self, household_ids):
     query = (CaseES(
         es_instance_alias='export').domain('icds-cas').case_type(
             'person').is_closed(False).source(SOURCE_FIELDS).filter(
                 filters.term("indices.referenced_id", household_ids)))
     return query.run()
Example #23
0
 def _assert_index_empty(self, esquery=CaseES()):
     results = esquery.run()
     self.assertEqual(0, results.total)
Example #24
0
def get_case_export_base_query(domain, case_type):
    return (CaseES(for_export=True).domain(domain).case_type(case_type).sort(
        "opened_on").sort('inserted_at', reset_sort=False))