def test_missing_aggregation(self):
     json_output = {
         "query": {
             "filtered": {
                 "filter": {
                     "and": [{
                         "match_all": {}
                     }]
                 },
                 "query": {
                     "match_all": {}
                 }
             }
         },
         "aggs": {
             "missing_user_id": {
                 "missing": {
                     "field": "user_id"
                 }
             },
         },
         "size": SIZE_LIMIT
     }
     query = HQESQuery('cases').aggregation(
         MissingAggregation(
             'missing_user_id',
             'user_id',
         ))
     self.checkQuery(query, json_output)
예제 #2
0
 def aggregation(self):
     if self.expand_value is None:
         return MissingAggregation(
             self.es_alias, self.data_source_field
         )
     return FilterAggregation(
         self.es_alias, filters.term(self.data_source_field, self.expand_value)
     )
예제 #3
0
def get_form_duration_stats_by_user(
        domain,
        app_id,
        xmlns,
        user_ids,
        startdate,
        enddate,
        by_submission_time=True):
    """Gets stats on the duration of a selected form grouped by users"""
    date_filter_fn = submitted_filter if by_submission_time else completed_filter

    missing_users = None in user_ids

    query = (
        FormES()
        .domain(domain)
        .user_ids_handle_unknown(user_ids)
        .remove_default_filter('has_user')
        .xmlns(xmlns)
        .filter(date_filter_fn(gte=startdate, lt=enddate))
        .aggregation(
            TermsAggregation('user_id', 'form.meta.userID').aggregation(
                ExtendedStatsAggregation(
                    'duration_stats',
                    'form.meta.timeStart',
                    script="doc['form.meta.timeEnd'].value - doc['form.meta.timeStart'].value",
                )
            )
        )
        .size(0)
    )

    if app_id:
        query = query.app(app_id)

    if missing_users:
        query = query.aggregation(
            MissingAggregation('missing_user_id', 'form.meta.userID').aggregation(
                ExtendedStatsAggregation(
                    'duration_stats',
                    'form.meta.timeStart',
                    script="doc['form.meta.timeEnd'].value - doc['form.meta.timeStart'].value",
                )
            )
        )

    result = {}
    aggregations = query.run().aggregations

    if missing_users:
        result[MISSING_KEY] = aggregations.missing_user_id.bucket.duration_stats.result

    buckets_dict = aggregations.user_id.buckets_dict
    for user_id, bucket in buckets_dict.iteritems():
        result[user_id] = bucket.duration_stats.result
    return result
예제 #4
0
 def distinct_values(self, column, size):
     # missing aggregation can be removed on upgrade to ES 2.0
     missing_agg_name = column + '_missing'
     query = self.es.terms_aggregation(column, column, size=size, sort_field="_term").size(0)
     query = query.aggregation(MissingAggregation(missing_agg_name, column))
     results = query.run()
     missing_result = getattr(results.aggregations, missing_agg_name).result
     result = getattr(results.aggregations, column).keys
     if missing_result['doc_count'] > 0:
         result.append(None)
     return result
예제 #5
0
def get_form_counts_by_user_xmlns(domain, startdate, enddate, user_ids=None,
                                  xmlnss=None, by_submission_time=True):

    missing_users = False

    date_filter_fn = submitted_filter if by_submission_time else completed_filter
    query = (
        FormES()
        .domain(domain)
        .filter(date_filter_fn(gte=startdate, lt=enddate))
        .aggregation(
            TermsAggregation('user_id', 'form.meta.userID').aggregation(
                TermsAggregation('app_id', 'app_id').aggregation(
                    TermsAggregation('xmlns', 'xmlns')
                )
            )
        )
        .size(0)
    )

    if user_ids:
        query = (query
            .user_ids_handle_unknown(user_ids)
            .remove_default_filter('has_user'))
        missing_users = None in user_ids
        if missing_users:
            query = query.aggregation(
                MissingAggregation('missing_user_id', 'form.meta.userID').aggregation(
                    TermsAggregation('app_id', 'app_id').aggregation(
                        TermsAggregation('xmlns', 'xmlns')
                    )
                )
            )

    if xmlnss:
        query = query.xmlns(xmlnss)

    counts = defaultdict(lambda: 0)
    aggregations = query.run().aggregations
    user_buckets = aggregations.user_id.buckets_list
    if missing_users:
        user_buckets.append(aggregations.missing_user_id.bucket)

    for user_bucket in user_buckets:
        app_buckets = user_bucket.app_id.buckets_list
        for app_bucket in app_buckets:
            xmlns_buckets = app_bucket.xmlns.buckets_list
            for xmlns_bucket in xmlns_buckets:
                key = (user_bucket.key, app_bucket.key, xmlns_bucket.key)
                counts[key] = xmlns_bucket.doc_count

    return counts
예제 #6
0
def get_last_form_submissions_by_user(domain, user_ids, app_id=None, xmlns=None):

    missing_users = None in user_ids

    query = (
        FormES()
        .domain(domain)
        .user_ids_handle_unknown(user_ids)
        .remove_default_filter('has_user')
        .aggregation(
            TermsAggregation('user_id', 'form.meta.userID').aggregation(
                TopHitsAggregation(
                    'top_hits_last_form_submissions',
                    'received_on',
                    is_ascending=False,
                )
            )
        )
        .size(0)
    )

    if app_id:
        query = query.app(app_id)

    if xmlns:
        query = query.xmlns(xmlns)

    result = {}
    if missing_users:
        query = query.aggregation(
            MissingAggregation('missing_user_id', 'form.meta.userID').aggregation(
                TopHitsAggregation(
                    'top_hits_last_form_submissions',
                    'received_on',
                    is_ascending=False,
                )
            )
        )

    aggregations = query.run().aggregations

    if missing_users:
        result[MISSING_KEY] = aggregations.missing_user_id.bucket.top_hits_last_form_submissions.hits

    buckets_dict = aggregations.user_id.buckets_dict
    for user_id, bucket in six.iteritems(buckets_dict):
        result[user_id] = bucket.top_hits_last_form_submissions.hits

    return result