def get_forms(domain, startdate, enddate, user_ids=None, app_ids=None, xmlnss=None, by_submission_time=True): date_filter_fn = submitted_filter if by_submission_time else completed_filter query = (FormES().domain(domain).filter( date_filter_fn(gte=startdate, lte=enddate)).app(app_ids).xmlns(xmlnss).size(5000)) if user_ids: query = (query.user_ids_handle_unknown(user_ids).remove_default_filter( 'has_user')) result = query.run() return PagedResult(total=result.total, hits=result.hits)
def _get_form_counts_by_user(domain, datespan, is_submission_time, user_ids=None): form_query = FormES().domain(domain).filter(filters.NOT(xmlns_filter(SYSTEM_FORM_XMLNS))) if is_submission_time: form_query = (form_query .submitted(gte=datespan.startdate.date(), lte=datespan.enddate.date())) else: form_query = (form_query .completed(gte=datespan.startdate.date(), lte=datespan.enddate.date())) if user_ids: form_query = form_query.user_id(user_ids) form_query = (form_query .user_aggregation() .size(0)) return form_query.run().aggregations.user.counts_by_bucket()
def get_form_ids_having_multimedia(domain, app_id, xmlns, startdate, enddate): # TODO: Remove references to _attachments once all forms have been migrated to Riak query = (FormES() .domain(domain) .app(app_id) .xmlns(xmlns) .submitted(gte=startdate, lte=enddate) .remove_default_filter("has_user") .source(['_attachments', '_id', 'external_blobs'])) form_ids = set() for form in query.scroll(): try: for attachment in _get_attachment_dicts_from_form(form): if attachment['content_type'] != "text/xml": form_ids.add(form['_id']) continue except AttributeError: pass return form_ids
def get_paged_forms_by_type( domain, doc_types, sort_col=None, desc=True, start=0, size=10): sort_col = sort_col or "received_on" query = ( FormES() .domain(domain) .remove_default_filter('is_xform_instance') .remove_default_filter('has_user') .doc_type([doc_type.lower() for doc_type in doc_types]) .sort(sort_col, desc=desc) .start(start) .size(size) ) result = query.run() return PagedResult(total=result.total, hits=result.hits)
def dehydrate(self, bundle): show_extras = _safe_bool(bundle, 'extras') if show_extras: extras = {} now = datetime.datetime.utcnow() form_es_base = (FormES().domain(bundle.request.domain).user_id( [bundle.obj._id])) extras['submitted_last_30'] = (form_es_base.submitted( gte=now - datetime.timedelta(days=30), lte=now).run()).total extras['completed_last_30'] = (form_es_base.completed( gte=now - datetime.timedelta(days=30), lte=now).run()).total first_of_this_month = datetime.datetime(now.year, now.month, 1) first_of_last_month = (first_of_this_month - datetime.timedelta(days=1)).replace(day=1) extras['submitted_last_month'] = (form_es_base.submitted( gte=first_of_last_month, lte=first_of_this_month).run()).total extras['completed_last_month'] = (form_es_base.completed( gte=first_of_last_month, lte=first_of_this_month).run()).total bundle.data['extras'] = extras return super(UserResource, self).dehydrate(bundle)
def test_query_by_user(self): domain = 'test-by-user-{}'.format(self.test_id) user1 = 'user1-{}'.format(self.test_id) user2 = 'user2-{}'.format(self.test_id) self._ship_forms_to_es( 2 * [TestFormMetadata(domain=domain, user_id=user1)] + 1 * [TestFormMetadata(domain=domain, user_id=user2)]) self.assertEqual(2, FormES().user_id([user1]).run().total) self.assertEqual(1, FormES().user_id([user2]).run().total) self.assertEqual(3, FormES().user_id([user1, user2]).run().total) # also test with domain filter self.assertEqual(3, FormES().domain(domain).run().total) self.assertEqual(2, FormES().domain(domain).user_id([user1]).run().total) self.assertEqual(1, FormES().domain(domain).user_id([user2]).run().total) self.assertEqual( 3, FormES().domain(domain).user_id([user1, user2]).run().total)
def _forms_with_attachments(domain, app_id, xmlns, datespan, user_types): enddate = datespan.enddate + timedelta(days=1) query = (FormES() .domain(domain) .app(app_id) .xmlns(xmlns) .submitted(gte=datespan.startdate, lte=enddate) .remove_default_filter("has_user") .source(['_id', 'external_blobs'])) if user_types: query = query.user_type(user_types) for form in query.scroll(): try: for attachment in form.get('external_blobs', {}).values(): if attachment['content_type'] != "text/xml": yield form continue except AttributeError: pass
def get_all_xmlns_app_id_pairs_submitted_to_in_domain(domain): """This is used to get (XMLNS, app_id) from submitted forms. The results get combined with data from all current app versions which means that this is only used to get (XMLNS, app_id) combos from forms submitted in the past which no longer have a corresponding form in the app (e.g. form deleted) Given that we can cache this for a long period of time under the assumption that a user isn't going to submit a form and then delete it from their app immediately. """ query = (FormES().domain(domain).aggregation( TermsAggregation("app_id", "app_id").aggregation( TermsAggregation("xmlns", "xmlns.exact"))).remove_default_filter( "has_xmlns").remove_default_filter("has_user").size(0)) query_result = query.run() form_counts = set() for app_id, bucket in query_result.aggregations.app_id.buckets_dict.items( ): for sub_bucket in bucket.xmlns.buckets_list: xmlns = sub_bucket.key form_counts.add((xmlns, app_id)) return form_counts
def _get_form_counts_by_user(domain, datespan, is_submission_time, user_ids=None, export=False): es_instance = ES_EXPORT_INSTANCE if export else ES_DEFAULT_INSTANCE form_query = FormES(es_instance_alias=es_instance).domain(domain) for xmlns in SYSTEM_FORM_XMLNS_MAP.keys(): form_query = form_query.filter(filters.NOT(xmlns_filter(xmlns))) if is_submission_time: form_query = (form_query.submitted(gte=datespan.startdate.date(), lte=datespan.enddate.date())) else: form_query = (form_query.completed(gte=datespan.startdate.date(), lte=datespan.enddate.date())) if user_ids: form_query = form_query.user_id(user_ids) form_query = (form_query.user_aggregation().size(0)) return form_query.run().aggregations.user.counts_by_bucket()
def get_last_forms_by_app(user_id): """ gets the last form submission for each app for a given user id :param user_id: id of a couch user :return: last form submission for every app that user has submitted """ query = (FormES().user_id(user_id).aggregation( TermsAggregation('app_id', 'app_id').aggregation( TopHitsAggregation( 'top_hits_last_form_submissions', 'received_on', is_ascending=False, ))).size(0)) aggregations = query.run().aggregations buckets_dict = aggregations.app_id.buckets_dict result = [] for app_id, bucket in buckets_dict.items(): result.append(bucket.top_hits_last_form_submissions.hits[0]) return result
def get_last_form_submissions_by_user(domain, user_ids, app_id=None): missing_users = None in user_ids query = (FormES().domain(domain).user_ids_handle_unknown( user_ids).remove_default_filter('has_user').aggregation( TermsAggregation('user_id', 'form.meta.userID').aggregation( TopHitsAggregation( 'top_hits_last_form_submissions', 'received_on', is_ascending=False, ))).size(0)) if app_id: query = query.app(app_id) result = {} if missing_users: query = query.aggregation( MissingAggregation('missing_user_id', 'form.meta.userID').aggregation( TopHitsAggregation( 'top_hits_last_form_submissions', 'received_on', is_ascending=False, ))) aggregations = query.run().aggregations if missing_users: result[ MISSING_KEY] = aggregations.missing_user_id.bucket.top_hits_last_form_submissions.hits buckets_dict = aggregations.user_id.buckets_dict for user_id, bucket in buckets_dict.iteritems(): result[user_id] = bucket.top_hits_last_form_submissions.hits return result
def get_form_duration_stats_for_users(domain, app_id, xmlns, user_ids, startdate, enddate, by_submission_time=True): """Gets the form duration stats for a group of users""" date_filter_fn = submitted_filter if by_submission_time else completed_filter query = (FormES().domain(domain).user_ids_handle_unknown( user_ids).remove_default_filter('has_user').xmlns(xmlns).filter( date_filter_fn(gte=startdate, lt=enddate)).aggregation( ExtendedStatsAggregation( 'duration_stats', 'form.meta.timeStart', script=_duration_script(), )).size(0)) if app_id: query = query.app(app_id) return query.run().aggregations.duration_stats.result
def get_number_of_forms_in_domain(domain): return FormES().domain(domain).count()
def _get_form_query(domain): return (FormES(es_instance_alias=ES_EXPORT_INSTANCE).domain( domain).remove_default_filter('has_user'))
def _get_unknown_user_type_user_ids_approx_count(): agg = CardinalityAggregation('users_count', 'form.meta.userID') return (FormES().user_type(UNKNOWN_USER_TYPE).aggregation(agg).run() .aggregations.users_count.value)
def _get_unknown_user_type_user_ids(): return (FormES().user_type(UNKNOWN_USER_TYPE).user_aggregation().run() .aggregations.user.keys)
def _get_forms_in_es(form_ids): return ( FormES( es_instance_alias=ES_EXPORT_INSTANCE ).remove_default_filters().form_ids(form_ids).values_list('_id', flat=True) )
def get_number_of_submissions(domain, year, month): date_start, date_end = get_start_and_end_dates_of_month(year, month) return (FormES().fields(['received_on' ]).domain(domain).submitted(gte=date_start, lt=date_end).count())
def _get_es_modified_dates_for_forms(domain, form_ids): results = (FormES( es_instance_alias=ES_EXPORT_INSTANCE).remove_default_filters().domain( domain).form_ids(form_ids).values_list('_id', 'received_on')) return dict(results)
def _doc_counts(self): self._print_value('Total cases', CaseES().domain(self.domain).count()) self._print_value( 'Open cases', CaseES().domain(self.domain).is_closed(False).count()) self._print_value('Total forms', FormES().domain(self.domain).count())
def send_unknown_user_type_stats(): datadog_gauge('commcare.fix_user_types.unknown_user_count', _get_unknown_user_type_user_ids_approx_count()) datadog_gauge('commcare.fix_user_types.unknown_user_form_count', FormES().user_type(UNKNOWN_USER_TYPE).count())
def test_unsupported_domain(self): form, metadata = self._create_form_and_sync_to_es('unsupported-domain') # confirm change made it to elasticserach results = FormES("report_xforms").run() self.assertEqual(0, results.total)
def _get_es_forms_received_on_date(self, date): return FormES().submitted(gte=date, lt=date + relativedelta(months=1)).count()
def _received_on_query(domain, desc=False): return (FormES().fields(['received_on']).domain(domain).sort('received_on', desc=desc))
def get_form_ids_in_domain_between_dates(domain, startdate, enddate): return FormES(for_export=True).domain(domain)\ .date_range('server_modified_on', gte=startdate, lte=enddate).source(['_id']).run().hits
def form_has_submissions(domain, app_id, xmlns): return FormES().domain(domain).app([app_id]).xmlns([xmlns]).count() != 0
def _get_form_query(domain): return (FormES(for_export=True) .domain(domain) .remove_default_filter('has_user'))