Ejemplo n.º 1
0
 def test_query_by_domain(self):
     domain1 = 'test1-{}'.format(self.test_id)
     domain2 = 'test2-{}'.format(self.test_id)
     self._ship_forms_to_es(2 * [TestFormMetadata(domain=domain1)] +
                            1 * [TestFormMetadata(domain=domain2)])
     self.assertEqual(2, FormES().domain(domain1).run().total)
     self.assertEqual(1, FormES().domain(domain2).run().total)
Ejemplo n.º 2
0
def get_all_user_ids_submitted(domain, app_ids=None):
    query = FormES().domain(domain).aggregation(TermsAggregation("user_id", "form.meta.userID")).size(0)

    if app_ids:
        query = query.app(app_ids)

    return query.run().aggregations.user_id.buckets_dict.keys()
Ejemplo n.º 3
0
def get_last_submission_time_for_user(domain, user_id, datespan):
    form_query = FormES() \
        .domain(domain) \
        .user_id([user_id]) \
        .completed(gte=datespan.startdate.date(), lte=datespan.enddate.date()) \
        .sort("form.meta.timeEnd", desc=True) \
        .size(1)
    results = form_query.run().hits

    def convert_to_date(date):
        return string_to_datetime(date).date() if date else None

    return convert_to_date(results[0]['form']['meta']['timeEnd'] if results else None)
Ejemplo n.º 4
0
    def test_form_soft_deletion(self):
        form, metadata = self._create_form_and_sync_to_es()

        # verify there
        results = FormES().run()
        self.assertEqual(1, results.total)

        # soft delete the form
        with self.process_form_changes:
            FormAccessors(self.domain).soft_delete_forms([form.form_id])
        self.elasticsearch.indices.refresh(XFORM_INDEX_INFO.index)

        # ensure not there anymore
        results = FormES().run()
        self.assertEqual(0, results.total)
Ejemplo n.º 5
0
def _get_form_counts_by_user(domain, datespan, is_submission_time):
    form_query = FormES().domain(domain)

    if is_submission_time:
        form_query = (form_query
            .submitted(gte=datespan.startdate.date(),
                       lte=datespan.enddate.date()))
    else:
        form_query = (form_query
            .completed(gte=datespan.startdate.date(),
                       lte=datespan.enddate.date()))
    form_query = (form_query
        .user_aggregation()
        .size(1))
    return form_query.run().aggregations.user.counts_by_bucket()
Ejemplo n.º 6
0
def get_users_with_forms(domain, user_ids):
    users_with_forms = set()
    for user_id in user_ids:
        f = FormES().domain(domain).user_id(user_id).count()
        if f:
            users_with_forms.add(user_id)
    return users_with_forms
Ejemplo n.º 7
0
def _get_form_counts_by_date(domain, user_ids, datespan, timezone,
                             is_submission_time):
    form_query = (FormES().domain(domain).user_id(user_ids))
    for xmlns in SYSTEM_FORM_XMLNS_MAP.keys():
        form_query = form_query.filter(filters.NOT(xmlns_filter(xmlns)))

    if is_submission_time:
        form_query = (form_query.submitted(
            gte=datespan.startdate.date(),
            lte=datespan.enddate.date()).submitted_histogram(timezone.zone))

    else:
        form_query = (form_query.completed(
            gte=datespan.startdate.date(),
            lte=datespan.enddate.date()).completed_histogram(timezone.zone))

    form_query = form_query.size(0)

    results = form_query.run().aggregations.date_histogram.buckets_list

    # Convert timestamp from millis -> seconds -> aware datetime
    # ES bucket key is an epoch timestamp relative to the timezone specified,
    # so pass timezone into fromtimestamp() to create an accurate datetime, otherwise will be treated as UTC
    results = list(
        map(
            lambda result:
            (datetime.fromtimestamp(result.key // 1000, timezone).date().
             isoformat(), result.doc_count),
            results,
        ))
    return dict(results)
Ejemplo n.º 8
0
    def test_form_soft_deletion(self):
        form, metadata = self._create_form_and_sync_to_es()

        # verify there
        results = FormES().run()
        self.assertEqual(1, results.total)

        # soft delete the form
        with process_pillow_changes('xform-pillow', {'skip_ucr': True}):
            with process_pillow_changes('DefaultChangeFeedPillow'):
                FormAccessors(self.domain).soft_delete_forms([form.form_id])
        self.elasticsearch.indices.refresh(XFORM_INDEX_INFO.index)

        # ensure not there anymore
        results = FormES().run()
        self.assertEqual(0, results.total)
Ejemplo n.º 9
0
def resave_es_forms_with_unknown_user_type(user_id):
    domain_form_id_list = (
        FormES().user_type(UNKNOWN_USER_TYPE).user_id(user_id).values_list(
            'domain', '_id', scroll=True))
    for domain, form_id in domain_form_id_list:
        form = FormAccessors(domain).get_form(form_id)
        resave_form(domain, form)
Ejemplo n.º 10
0
def _received_on_query(domain, desc=False):
    return (
        FormES()
        .fields(['received_on'])
        .domain(domain)
        .sort('received_on', desc=desc)
    )
Ejemplo n.º 11
0
def get_last_forms_by_app(user_id):
    """
    gets the last form submission for each app for a given user id
    :param user_id: id of a couch user
    :return: last form submission for every app that user has submitted
    """
    query = (
        FormES()
            .user_id(user_id)
            .aggregation(
            TermsAggregation('app_id', 'app_id').aggregation(
                TopHitsAggregation(
                    'top_hits_last_form_submissions',
                    'received_on',
                    is_ascending=False,
                )
            )
        )
        .size(0)
    )

    aggregations = query.run().aggregations

    buckets_dict = aggregations.app_id.buckets_dict
    result = []
    for app_id, bucket in buckets_dict.items():
        result.append(bucket.top_hits_last_form_submissions.hits[0])

    return result
Ejemplo n.º 12
0
 def _get_es_modified_dates_for_forms(form_ids):
     results = (FormES(for_export=True).remove_default_filters().form_ids(
         form_ids).values_list('_id', 'received_on', 'doc_type', 'domain'))
     return {
         _id: (iso_string_to_datetime(received_on), doc_type, domain)
         for _id, received_on, doc_type, domain in results
     }
Ejemplo n.º 13
0
def get_paged_forms_by_type(domain, doc_types, start=0, size=10):
    query = (FormES().domain(domain).remove_default_filter(
        'is_xform_instance').remove_default_filter('has_user').doc_type([
            doc_type.lower() for doc_type in doc_types
        ]).sort("received_on", desc=True).start(start).size(size))
    result = query.run()
    return PagedResult(total=result.total, hits=result.hits)
Ejemplo n.º 14
0
def get_form_counts_for_domains(domains):
    return FormES() \
        .filter(filters.term('domain', domains)) \
        .domain_aggregation() \
        .size(0) \
        .run() \
        .aggregations.domain.counts_by_bucket()
Ejemplo n.º 15
0
    def dehydrate(self, bundle):
        show_extras = _safe_bool(bundle, 'extras')
        if show_extras:
            extras = {}
            now = datetime.datetime.utcnow()
            form_es_base = (FormES().domain(bundle.request.domain).user_id(
                [bundle.obj._id]))

            extras['submitted_last_30'] = (form_es_base.submitted(
                gte=now - datetime.timedelta(days=30),
                lte=now).size(0).run()).total
            extras['completed_last_30'] = (form_es_base.completed(
                gte=now - datetime.timedelta(days=30),
                lte=now).size(0).run()).total
            first_of_this_month = datetime.datetime(now.year, now.month, 1)
            first_of_last_month = (first_of_this_month -
                                   datetime.timedelta(days=1)).replace(day=1)
            extras['submitted_last_month'] = (form_es_base.submitted(
                gte=first_of_last_month,
                lte=first_of_this_month).size(0).run()).total
            extras['completed_last_month'] = (form_es_base.completed(
                gte=first_of_last_month,
                lte=first_of_this_month).size(0).run()).total
            bundle.data['extras'] = extras
        return super(UserResource, self).dehydrate(bundle)
Ejemplo n.º 16
0
def get_form_duration_stats_for_users(domain,
                                      app_id,
                                      xmlns,
                                      user_ids,
                                      startdate,
                                      enddate,
                                      by_submission_time=True):
    """Gets the form duration stats for a group of users"""
    date_filter_fn = submitted_filter if by_submission_time else completed_filter

    query = (FormES().domain(domain).user_ids_handle_unknown(
        user_ids
    ).remove_default_filter('has_user').xmlns(xmlns).filter(
        date_filter_fn(gte=startdate, lt=enddate)
    ).aggregation(
        ExtendedStatsAggregation(
            'duration_stats',
            'form.meta.timeStart',
            script=
            "doc['form.meta.timeEnd'].value - doc['form.meta.timeStart'].value",
        )).size(0))

    if app_id:
        query = query.app(app_id)

    return query.run().aggregations.duration_stats.result
Ejemplo n.º 17
0
 def _assert_form_is_in_es(self, form):
     results = FormES().run()
     self.assertEqual(1, results.total)
     form_doc = results.hits[0]
     self.assertEqual(self.domain, form_doc['domain'])
     self.assertEqual(form.xmlns, form_doc['xmlns'])
     self.assertEqual('XFormInstance', form_doc['doc_type'])
Ejemplo n.º 18
0
def _get_form_counts_by_date(domain, user_ids, datespan, timezone,
                             is_submission_time):
    form_query = (FormES().domain(domain).user_id(user_ids))
    for xmlns in SYSTEM_FORM_XMLNS_MAP.keys():
        form_query = form_query.filter(filters.NOT(xmlns_filter(xmlns)))

    if is_submission_time:
        form_query = (form_query.submitted(
            gte=datespan.startdate.date(),
            lte=datespan.enddate.date()).submitted_histogram(timezone.zone))

    else:
        form_query = (form_query.completed(
            gte=datespan.startdate.date(),
            lte=datespan.enddate.date()).completed_histogram(timezone.zone))

    form_query = form_query.size(0)

    results = form_query.run().aggregations.date_histogram.buckets_list

    # Convert timestamp into timezone aware datetime. Must divide timestamp by 1000 since python's
    # fromtimestamp takes a timestamp in seconds, whereas elasticsearch's timestamp is in milliseconds
    results = list(
        map(
            lambda result: (datetime.fromtimestamp(result.key // 1000).date().
                            isoformat(), result.doc_count),
            results,
        ))
    return dict(results)
Ejemplo n.º 19
0
def get_last_submission_time_for_users(domain,
                                       user_ids,
                                       datespan,
                                       es_instance_alias=ES_DEFAULT_INSTANCE):
    def convert_to_date(date):
        return string_to_datetime(date).date() if date else None

    query = (FormES(es_instance_alias=es_instance_alias).domain(
        domain).user_id(user_ids).completed(
            gte=datespan.startdate.date(),
            lte=datespan.enddate.date()).aggregation(
                TermsAggregation('user_id', 'form.meta.userID').aggregation(
                    TopHitsAggregation(
                        'top_hits_last_form_submissions',
                        'form.meta.timeEnd',
                        is_ascending=False,
                        include='form.meta.timeEnd',
                    ))).size(0))

    aggregations = query.run().aggregations
    buckets_dict = aggregations.user_id.buckets_dict
    result = {}
    for user_id, bucket in buckets_dict.items():
        result[user_id] = convert_to_date(
            bucket.top_hits_last_form_submissions.hits[0]['form']['meta']
            ['timeEnd'])
    return result
Ejemplo n.º 20
0
def get_last_form_submission_for_xmlns(domain, xmlns):
    query = (FormES().domain(domain).xmlns(xmlns).sort('received_on',
                                                       desc=True).size(1))

    if query.run().hits:
        return query.run().hits[0]
    return None
Ejemplo n.º 21
0
def get_username_in_last_form_user_id_submitted(domain, user_id):
    query = (FormES().domain(domain).user_id(user_id).sort(
        'received_on', desc=True).source(['form.meta.username']).size(1))

    results = query.run().hits
    if results:
        return results[0]['form']['meta'].get('username', None)
Ejemplo n.º 22
0
def get_form_ids_having_multimedia(domain,
                                   app_id,
                                   xmlns,
                                   startdate,
                                   enddate,
                                   user_types=None,
                                   group=None):
    query = (FormES().domain(domain).app(app_id).xmlns(xmlns).submitted(
        gte=startdate, lte=enddate).remove_default_filter("has_user").source(
            ['_id', 'external_blobs']))

    if user_types:
        query = query.user_type(user_types)

    if group:
        results = (GroupES().domain(domain).group_ids([group]).source(
            ['users'])).run().hits
        assert len(results) <= 1
        user_ids = results[0]['users']
        query = query.user_id(user_ids)

    form_ids = set()
    for form in query.scroll():
        try:
            for attachment in _get_attachment_dicts_from_form(form):
                if attachment['content_type'] != "text/xml":
                    form_ids.add(form['_id'])
                    continue
        except AttributeError:
            pass
    return form_ids
Ejemplo n.º 23
0
def send_unknown_user_type_stats():
    metrics_gauge('commcare.fix_user_types.unknown_user_count',
                  _get_unknown_user_type_user_ids_approx_count(),
                  multiprocess_mode=MPM_MAX)
    metrics_gauge('commcare.fix_user_types.unknown_user_form_count',
                  FormES().user_type(UNKNOWN_USER_TYPE).count(),
                  multiprocess_mode=MPM_MAX)
Ejemplo n.º 24
0
def get_last_submission_time_for_users(domain,
                                       user_ids,
                                       datespan,
                                       for_export=False):
    def convert_to_date(date):
        return string_to_datetime(date).date() if date else None

    query = (FormES(
        for_export=for_export).domain(domain).user_id(user_ids).submitted(
            gte=datespan.startdate.date(),
            lte=datespan.enddate.date()).aggregation(
                TermsAggregation('user_id', 'form.meta.userID').aggregation(
                    TopHitsAggregation(
                        'top_hits_last_form_submissions',
                        'received_on',
                        is_ascending=False,
                        include='received_on',
                    ))).size(0))

    aggregations = query.run().aggregations
    buckets_dict = aggregations.user_id.buckets_dict
    result = {}
    for user_id, bucket in buckets_dict.items():
        result[user_id] = convert_to_date(
            bucket.top_hits_last_form_submissions.hits[0]['received_on'])
    return result
Ejemplo n.º 25
0
def resave_es_forms_with_unknown_user_type(user_id):
    domain_form_id_list = (
        FormES().user_type(UNKNOWN_USER_TYPE).user_id(user_id)
        .values_list('domain', '_id', scroll=True)
    )
    for domain, form_id in domain_form_id_list:
        form = XFormInstance.objects.get_form(form_id, domain)
        resave_form(domain, form)
Ejemplo n.º 26
0
def get_form_ids_missing_from_elasticsearch(all_form_ids):
    missing_from_elasticsearch = set()
    for form_ids in chunked(all_form_ids, 500):
        form_ids = set(form_ids)
        not_missing = set(FormES().doc_id(form_ids).get_ids())
        missing_from_elasticsearch.update(form_ids - not_missing)
        assert not_missing - form_ids == set()
    return list(missing_from_elasticsearch)
Ejemplo n.º 27
0
def get_all_user_ids_submitted(domain, app_ids=None):
    query = (FormES().domain(domain).aggregation(
        TermsAggregation('user_id', 'form.meta.userID')).size(0))

    if app_ids:
        query = query.app(app_ids)

    return list(query.run().aggregations.user_id.buckets_dict)
Ejemplo n.º 28
0
 def _get_es_modified_dates_for_forms(form_ids):
     results = (
         FormES(es_instance_alias=ES_EXPORT_INSTANCE).remove_default_filters()
         .form_ids(form_ids)
         .values_list('_id', 'received_on', 'doc_type', 'domain')
     )
     return {_id: (iso_string_to_datetime(received_on), doc_type, domain)
             for _id, received_on, doc_type, domain in results}
Ejemplo n.º 29
0
def get_form_name_from_last_submission_for_xmlns(domain, xmlns):
    query = (FormES().domain(domain).xmlns(xmlns).sort(
        'received_on',
        desc=True).source(['form.@name']).size(1).non_null('form.@name'))

    results = query.run().hits
    if results:
        return results[0]['form']['@name']
    return None
Ejemplo n.º 30
0
def get_form_ids_having_multimedia(domain, app_id, xmlns, datespan,
                                   user_types):
    enddate = datespan.enddate + timedelta(days=1)
    query = (FormES().domain(domain).app(app_id).xmlns(xmlns).submitted(
        gte=datespan.startdate, lte=enddate).remove_default_filter("has_user"))

    if user_types:
        query = query.user_type(user_types)
    return {form['_id'] for form in _forms_with_attachments(query)}
Ejemplo n.º 31
0
def get_form_duration_stats_by_user(
        domain,
        app_id,
        xmlns,
        user_ids,
        startdate,
        enddate,
        by_submission_time=True):
    """Gets stats on the duration of a selected form grouped by users"""
    date_filter_fn = submitted_filter if by_submission_time else completed_filter

    missing_users = None in user_ids

    query = (
        FormES()
        .domain(domain)
        .user_ids_handle_unknown(user_ids)
        .remove_default_filter('has_user')
        .xmlns(xmlns)
        .filter(date_filter_fn(gte=startdate, lt=enddate))
        .aggregation(
            TermsAggregation('user_id', 'form.meta.userID').aggregation(
                ExtendedStatsAggregation(
                    'duration_stats',
                    'form.meta.timeStart',
                    script="doc['form.meta.timeEnd'].value - doc['form.meta.timeStart'].value",
                )
            )
        )
        .size(0)
    )

    if app_id:
        query = query.app(app_id)

    if missing_users:
        query = query.aggregation(
            MissingAggregation('missing_user_id', 'form.meta.userID').aggregation(
                ExtendedStatsAggregation(
                    'duration_stats',
                    'form.meta.timeStart',
                    script="doc['form.meta.timeEnd'].value - doc['form.meta.timeStart'].value",
                )
            )
        )

    result = {}
    aggregations = query.run().aggregations

    if missing_users:
        result[MISSING_KEY] = aggregations.missing_user_id.bucket.duration_stats.result

    buckets_dict = aggregations.user_id.buckets_dict
    for user_id, bucket in buckets_dict.iteritems():
        result[user_id] = bucket.duration_stats.result
    return result
Ejemplo n.º 32
0
 def _resolve_from_template(self, template, query_context):
     # todo: support other types and options
     assert template.type == 'form'
     startdate, enddate = get_daterange_start_end_dates(template.time_range)
     xmlns = Form.get_form(template.source_id).xmlns
     return FormES().user_id(query_context.user._id).xmlns(
         [xmlns]).submitted(
             gte=startdate,
             lte=enddate,
         ).size(0).count()
Ejemplo n.º 33
0
def _get_form_counts_by_user(domain, datespan, is_submission_time, user_ids=None):
    form_query = FormES().domain(domain).filter(filters.NOT(xmlns_filter(SYSTEM_FORM_XMLNS)))

    if is_submission_time:
        form_query = (form_query
            .submitted(gte=datespan.startdate.date(),
                       lte=datespan.enddate.date()))
    else:
        form_query = (form_query
            .completed(gte=datespan.startdate.date(),
                       lte=datespan.enddate.date()))

    if user_ids:
        form_query = form_query.user_id(user_ids)

    form_query = (form_query
        .user_aggregation()
        .size(0))
    return form_query.run().aggregations.user.counts_by_bucket()
Ejemplo n.º 34
0
def _get_form_counts_by_user(domain, datespan, is_submission_time, user_ids=None):
    form_query = FormES().domain(domain).filter(filters.NOT(xmlns_filter(SYSTEM_FORM_XMLNS)))

    if is_submission_time:
        form_query = (form_query
            .submitted(gte=datespan.startdate.date(),
                       lte=datespan.enddate.date()))
    else:
        form_query = (form_query
            .completed(gte=datespan.startdate.date(),
                       lte=datespan.enddate.date()))

    if user_ids:
        form_query = form_query.user_id(user_ids)

    form_query = (form_query
        .user_aggregation()
        .size(0))
    return form_query.run().aggregations.user.counts_by_bucket()
Ejemplo n.º 35
0
def _get_form_counts_by_user(domain, datespan, is_submission_time, user_ids=None, export=False):
    es_instance = ES_EXPORT_INSTANCE if export else ES_DEFAULT_INSTANCE
    form_query = FormES(es_instance_alias=es_instance).domain(domain)
    for xmlns in SYSTEM_FORM_XMLNS_MAP.keys():
        form_query = form_query.filter(filters.NOT(xmlns_filter(xmlns)))

    if is_submission_time:
        form_query = (form_query
            .submitted(gte=datespan.startdate.date(),
                       lte=datespan.enddate.date()))
    else:
        form_query = (form_query
            .completed(gte=datespan.startdate.date(),
                       lte=datespan.enddate.date()))

    if user_ids:
        form_query = form_query.user_id(user_ids)

    form_query = (form_query
        .user_aggregation()
        .size(0))
    return form_query.run().aggregations.user.counts_by_bucket()
Ejemplo n.º 36
0
def _get_form_counts_by_date(domain, user_ids, datespan, timezone, is_submission_time):
    form_query = FormES().domain(domain).user_id(user_ids).filter(filters.NOT(xmlns_filter(SYSTEM_FORM_XMLNS)))

    if is_submission_time:
        form_query = form_query.submitted(
            gte=datespan.startdate.date(), lte=datespan.enddate.date()
        ).submitted_histogram(timezone.zone)

    else:
        form_query = form_query.completed(
            gte=datespan.startdate.date(), lte=datespan.enddate.date()
        ).completed_histogram(timezone.zone)

    form_query = form_query.size(0)

    results = form_query.run().aggregations.date_histogram.buckets_list

    # Convert timestamp into timezone aware dateime. Must divide timestamp by 1000 since python's
    # fromtimestamp takes a timestamp in seconds, whereas elasticsearch's timestamp is in milliseconds
    results = map(
        lambda result: (datetime.fromtimestamp(result.key / 1000).date().isoformat(), result.doc_count), results
    )
    return dict(results)
Ejemplo n.º 37
0
def get_last_form_submission_for_xmlns(domain, xmlns):
    query = FormES().domain(domain).xmlns(xmlns).sort("received_on", desc=True).size(1)

    if query.run().hits:
        return query.run().hits[0]
    return None
Ejemplo n.º 38
0
 def test_query_completed_date(self):
     domain = 'test-completed-{}'.format(self.test_id)
     early = datetime.datetime(2015, 12, 5)
     later = datetime.datetime(2015, 12, 8)
     self._ship_forms_to_es(
         2 * [TestFormMetadata(domain=domain, time_end=early)] +
         1 * [TestFormMetadata(domain=domain, time_end=later)]
     )
     base_qs = FormES().domain(domain)
     self.assertEqual(3, base_qs.run().total)
     # test gt/gte
     self.assertEqual(3, base_qs.completed(gt=early - datetime.timedelta(days=1)).run().total)
     self.assertEqual(3, base_qs.completed(gte=early).run().total)
     self.assertEqual(1, base_qs.completed(gt=early).run().total)
     self.assertEqual(1, base_qs.completed(gte=later).run().total)
     self.assertEqual(0, base_qs.completed(gt=later).run().total)
     # test lt/lte
     self.assertEqual(3, base_qs.completed(lt=later + datetime.timedelta(days=1)).run().total)
     self.assertEqual(3, base_qs.completed(lte=later).run().total)
     self.assertEqual(2, base_qs.completed(lt=later).run().total)
     self.assertEqual(2, base_qs.completed(lte=early).run().total)
     self.assertEqual(0, base_qs.completed(lt=early).run().total)
     # test both
     self.assertEqual(0, base_qs.completed(gt=early, lt=later).run().total)