def get_total_clients_data(domains, datespan, interval, datefield='opened_on'): """ Returns cases that have used SMS. Returned based on date case is opened """ sms_cases = (SMSES().to_commcare_case().domain(domains).terms_aggregation( 'couch_recipient', 'cases').size(0)) cases = sms_cases.run().aggregations.cases.keys cases_after_date = (CaseES().domain(domains).filter({ "ids": { "values": cases } }).opened_range(gte=datespan.startdate, lte=datespan.enddate).date_histogram( 'date', datefield, interval).size(0)) histo_data = cases_after_date.run().aggregations.date.as_facet_result() cases_before_date = (CaseES().domain(domains).filter({ "ids": { "values": cases } }).opened_range(lt=datespan.startdate).size(0)).run().total return format_return_data(histo_data, cases_before_date, datespan)
def get_total_clients_data(domains, datespan, interval, datefield='opened_on'): """ Returns cases that have used SMS. Returned based on date case is opened """ sms_cases = (SMSES().to_commcare_case().domain(domains).terms_facet( 'couch_recipient', 'cases', size=LARGE_ES_NUMBER).size(0)) cases = [u['term'] for u in sms_cases.run().facet('cases', 'terms')] cases_after_date = (CaseES().domain(domains).filter({ "ids": { "values": cases } }).opened_range(gte=datespan.startdate, lte=datespan.enddate).date_histogram( 'date', datefield, interval).size(0)) histo_data = cases_after_date.run().facet('date', 'entries') cases_before_date = (CaseES().domain(domains).filter({ "ids": { "values": cases } }).opened_range(lt=datespan.startdate).size(0)).run().total return format_return_data(histo_data, cases_before_date, datespan)
def cases_in_last(domain, days, case_type=None): """ Returns the number of open cases that have been modified in the last <days> days """ now = datetime.utcnow() then = json_format_datetime(now - timedelta(days=int(days))) now = json_format_datetime(now) query = CaseES().domain(domain).modified_range(gte=then, lte=now).is_closed(False) if case_type: query = query.case_type(case_type) return query.count()
def get_patient_display_cache(case_ids): """ For a given set of case_ids, return name and pact_ids """ if len(case_ids) == 0: return {} case_es = ReportCaseESView(PACT_DOMAIN) query = (CaseES().remove_default_filters().domain(PACT_DOMAIN).source( ["_id", "name"]).size(len(case_ids))) query = query.add_query({"ids": {"values": case_ids}}) query["script_fields"] = { "case_id": { "script": "_source._id" }, "pactid": get_report_script_field("pactid"), "first_name": get_report_script_field("first_name"), "last_name": get_report_script_field("last_name"), } res = case_es.run_query(query.raw_query) from pact.reports.patient import PactPatientInfoReport ret = {} for entry in res['hits']['hits']: case_id = entry['case_id'] ret[case_id] = entry ret[case_id]['url'] = PactPatientInfoReport.get_url( *['pact']) + "?patient_id=%s" % case_id return ret
def get_case_ids(cls, domain, case_type, boundary_date=None): """ Retrieves the case ids in chunks, yielding a list of case ids each time until there are none left. """ chunk_size = 100 query = (CaseES().domain(domain).case_type(case_type).is_closed( closed=False).exclude_source().size(chunk_size)) if boundary_date: query = query.server_modified_range(lte=boundary_date) result = [] for case_id in query.scroll(): if not isinstance(case_id, basestring): raise ValueError( "Something is wrong with the query, expected ids only") result.append(case_id) if len(result) >= chunk_size: yield result result = [] if result: yield result
def scroll_case_ids_by_domain_and_case_type(domain, case_type, chunk_size=100): """ Retrieves the case ids in chunks, yielding a list of case ids each time until there are none left. Only filters on domain and case type, and includes both open and closed cases. """ query = (CaseES().domain(domain).case_type( case_type).exclude_source().size(chunk_size)) result = [] for case_id in query.scroll(): if not isinstance(case_id, six.string_types): raise ValueError( "Something is wrong with the query, expected ids only") soft_assert_type_text(case_id) result.append(case_id) if len(result) >= chunk_size: yield result result = [] if result: yield result
def inactive_cases_in_last(domain, days): """ Returns the number of open cases that have been modified in the last <days> days """ now = datetime.utcnow() then = json_format_datetime(now - timedelta(days=int(days))) return CaseES().domain(domain).modified_range(lt=then).is_closed(False).count()
def get_case_ids(cls, domain, case_type, boundary_date=None): query = (CaseES() .domain(domain) .case_type(case_type) .is_closed(closed=False) .exclude_source()) if boundary_date is not None: query = query.server_modified_range(lte=boundary_date) results = query.run() return results.doc_ids
def db_comparisons(request): comparison_config = [{ 'description': 'Users (base_doc is "CouchUser")', 'couch_db': CommCareUser.get_db(), 'view_name': 'users/by_username', 'es_query': UserES().remove_default_filter('active').remove_default_filter( 'mobile_worker').size(0), 'sql_rows': User.objects.count(), }, { 'description': 'Domains (doc_type is "Domain")', 'couch_db': Domain.get_db(), 'view_name': 'domain/by_status', 'es_query': DomainES().size(0), 'sql_rows': None, }, { 'description': 'Forms (doc_type is "XFormInstance")', 'couch_db': XFormInstance.get_db(), 'view_name': 'couchforms/by_xmlns', 'es_query': FormES().remove_default_filter('has_xmlns').remove_default_filter( 'has_user').size(0), 'sql_rows': FormData.objects.count(), }, { 'description': 'Cases (doc_type is "CommCareCase")', 'couch_db': CommCareCase.get_db(), 'view_name': 'case/by_owner', 'es_query': CaseES().size(0), 'sql_rows': None, }] comparisons = [] for comp in comparison_config: comparisons.append({ 'description': comp['description'], 'couch_docs': comp['couch_db'].view( comp['view_name'], reduce=True, ).one()['value'], 'es_docs': comp['es_query'].run().total, 'sql_rows': comp['sql_rows'] if comp['sql_rows'] else 'n/a', }) return json_response(comparisons)
def _get_case_ids_from_es(cls, domain, case_type, boundary_date=None): query = (CaseES().domain(domain).case_type(case_type).is_closed( closed=False).exclude_source().size(100)) if boundary_date: query = query.server_modified_range(lte=boundary_date) for case_id in query.scroll(): if not isinstance(case_id, basestring): raise ValueError( "Something is wrong with the query, expected ids only") yield case_id
def es_query_from_get_params(search_params, domain, reserved_query_params=None, doc_type='form'): # doc_type can be form or case assert doc_type in ['form', 'case'] es = FormES() if doc_type == 'form' else CaseES() query = es.remove_default_filters().domain(domain) if doc_type == 'form': if 'include_archived' in search_params: query = query.filter( filters.OR(filters.term('doc_type', 'xforminstance'), filters.term('doc_type', 'xformarchived'))) else: query = query.filter(filters.term('doc_type', 'xforminstance')) if '_search' in search_params: # This is undocumented usecase by Data export tool and one custom project # Validate that the passed in param is one of these two expected _filter = _validate_and_get_es_filter( json.loads(search_params['_search'])) query = query.filter(_filter) # filters are actually going to be a more common case reserved_query_params = RESERVED_QUERY_PARAMS | set(reserved_query_params or []) query_params = { param: value for param, value in search_params.items() if param not in reserved_query_params and not param.endswith('__full') } for consumer in query_param_consumers: try: payload_filter = consumer.consume_params(query_params) except DateTimeError as e: raise Http400("Bad query parameter: {}".format(str(e))) if payload_filter: query = query.filter(payload_filter) # add unconsumed filters for param, value in query_params.items(): # assume these fields are analyzed in ES so convert to lowercase # Any fields that are not analyzed in ES should be in the ``query_param_consumers`` above value = value.lower() query = query.filter(filters.term(param, value)) return query.raw_query
def get_active_supply_points_data(domains, datespan, interval): """ Returns list of timestamps and active supply points have been modified during each interval """ histo_data = [] for start_date, end_date in intervals(interval, datespan.startdate, datespan.enddate): num_active_supply_points = (CaseES().domain(domains).case_type( SUPPLY_POINT_CASE_TYPE).active_in_range( gte=start_date, lte=end_date).size(0)).run().total if num_active_supply_points > 0: histo_data.append( get_data_point(num_active_supply_points, end_date)) return format_return_data(histo_data, 0, datespan)
def db_comparisons(request): def _simple_view_couch_query(db, view_name): return db.view(view_name, reduce=True).one()['value'] comparison_config = [ { 'description': 'Users (base_doc is "CouchUser")', 'couch_docs': _simple_view_couch_query(CommCareUser.get_db(), 'users/by_username'), 'es_query': UserES().remove_default_filter('active').size(0), 'sql_rows': User.objects.count(), }, { 'description': 'Domains (doc_type is "Domain")', 'couch_docs': _simple_view_couch_query(Domain.get_db(), 'domain/by_status'), 'es_query': DomainES().size(0), 'sql_rows': None, }, { 'description': 'Forms (doc_type is "XFormInstance")', 'couch_docs': get_number_of_forms_all_domains_in_couch(), 'es_query': FormES().remove_default_filter('has_xmlns') .remove_default_filter('has_user') .size(0), 'sql_rows': FormData.objects.exclude(domain__isnull=True).count(), }, { 'description': 'Cases (doc_type is "CommCareCase")', 'couch_docs': get_total_case_count(), 'es_query': CaseES().size(0), 'sql_rows': CaseData.objects.exclude(domain__isnull=True).count(), } ] comparisons = [] for comp in comparison_config: comparisons.append({ 'description': comp['description'], 'couch_docs': comp['couch_docs'], 'es_docs': comp['es_query'].run().total, 'sql_rows': comp['sql_rows'] if comp['sql_rows'] else 'n/a', }) return json_response(comparisons)
def handle(self, domain, case_type, *args, **options): perform_update = True query = (CaseES(es_instance_alias=ES_EXPORT_INSTANCE).domain( domain).case_type(case_type).is_closed(False).term( 'name.exact', '')) cases_count = query.count() print("Number of cases to be updated approximately: %s" % cases_count) if not input("Do you wish to update cases (y/n)") == 'y': perform_update = False if not input("Do you wish to just log updates (y/n)") == 'y': exit(0) case_ids = query.get_ids() print("Begin iterating %s cases" % len(case_ids)) case_accessor = CaseAccessors(domain) case_updates = [] filename = "case_updates_%s_%s_%s.csv" % (domain, case_type, datetime.utcnow()) with open(filename, 'w') as f: writer = csv.DictWriter(f, ['case_id', 'new_value']) writer.writeheader() for case_id in with_progress_bar(case_ids): case = case_accessor.get_case(case_id) if case.name: continue update_to_name = get_last_non_blank_value(case, 'name') if update_to_name: writer.writerow({ 'case_id': case_id, 'new_value': update_to_name }) if perform_update: case_updates.append((case_id, { 'name': update_to_name }, False)) # update batch when we have the threshold if len(case_updates) == CASE_UPDATE_BATCH: bulk_update_cases(domain, case_updates, DEVICE_ID) case_updates = [] # submit left over case updates if case_updates: print("Performing last batch of updates") bulk_update_cases(domain, case_updates, DEVICE_ID) print("Finished. Update details in %s" % filename)
def get_case_by_identifier(domain, identifier): case_accessors = CaseAccessors(domain) # Try by any of the allowed identifiers for identifier_type in ALLOWED_CASE_IDENTIFIER_TYPES: result = CaseES().domain(domain).filter( filters.term(identifier_type, identifier)).get_ids() if result: return case_accessors.get_case(result[0]) # Try by case id try: case_by_id = case_accessors.get_case(identifier) if case_by_id.domain == domain: return case_by_id except (CaseNotFound, KeyError): pass return None
def es_results(self): query = CaseES('report_cases').domain(self.domain) if self.case_type: query = query.case_type(self.case_type) if self.case_filter: query = query.filter(self.case_filter) if self.case_status: query = query.filter(filters.term("closed", self.case_status == 'closed')) query = ( query .set_sorting_block(self.get_sorting_block()) .start(self.pagination.start) .size(self.pagination.count) ) return query.run().hits
def handle(self, **options): domain = options['domain'] debug = options['debug'] cleanup = options['cleanup'] domain_query = CaseES().domain(domain) valid_case_ids = set(domain_query.get_ids()) referenced_case_ids = { index['referenced_id'] for hit in domain_query.source('indices.referenced_id').run().hits for index in hit['indices'] } invalid_referenced_ids = referenced_case_ids - valid_case_ids if len(invalid_referenced_ids) > ES_MAX_CLAUSE_COUNT: print("there's a lot of invalid ids here. ES queries may not handle this well") cases_with_invalid_references = ( domain_query .term('indices.referenced_id', invalid_referenced_ids) .source(['_id', 'type', 'indices', 'owner_id', 'opened_by', 'xform_ids']) .run().hits ) with open(options['filename'], 'w', encoding='utf-8') as csvfile: writer = csv.writer(csvfile) headers = [ 'case id', 'case type', 'creating form id', 'referenced id', 'referenced_type', 'index relationship', 'index identifier', 'owner id', 'owner name', 'opened by id', 'opened by name', ] if debug: headers.append('app version') writer.writerow(headers) for case in cases_with_invalid_references: for index in case['indices']: if index['referenced_id'] in invalid_referenced_ids: form_id = case['xform_ids'][0] row = [ case['_id'], case['type'], form_id, index['referenced_id'], index['referenced_type'], index['relationship'], index['identifier'], case['owner_id'], cached_owner_id_to_display(case['owner_id']), case['opened_by'], cached_owner_id_to_display(case['opened_by']), ] if debug: form = FormAccessors(domain=domain).get_form(form_id) app_version_info = get_app_version_info( domain, form.build_id, form.form_data['@version'], form.metadata, ) row.append(app_version_info.build_version) writer.writerow(row) if cleanup: missing = set() deleted = set() exists = set() for invalid_id in invalid_referenced_ids: try: case = CaseAccessors(domain).get_case(invalid_id) except CaseNotFound: missing.add(invalid_id) else: if case.is_deleted: deleted.add(case) else: exists.add(case) for case_to_resync in exists: # if the case actually exists resync it to fix the es search resave_case(domain, case_to_resync, send_post_save_signal=False) if exists: print('resynced {} cases that were actually not deleted'.format(len(exists))) for case in deleted: # delete the deleted case's entire network in one go call_command('delete_related_cases', domain, case.case_id) for case in cases_with_invalid_references: for index in case['indices']: if index['referenced_id'] in missing: # this is just an invalid reference. no recourse but to delete the case itself call_command('delete_related_cases', domain, case['_id'])
def handle(self, **options): domain = options['domain'] debug = options['debug'] cleanup = options['cleanup'] domain_query = CaseES().domain(domain) valid_case_ids = set(domain_query.get_ids()) referenced_case_ids = { index['referenced_id'] for hit in domain_query.source('indices.referenced_id').run().hits for index in hit['indices'] } invalid_referenced_ids = referenced_case_ids - valid_case_ids if len(invalid_referenced_ids) > ES_MAX_CLAUSE_COUNT: print( "there's a lot of invalid ids here. ES queries may not handle this well" ) cases_with_invalid_references = (domain_query.term( 'indices.referenced_id', invalid_referenced_ids).source([ '_id', 'type', 'indices', 'owner_id', 'opened_by', 'xform_ids' ]).run().hits) with open(options['filename'], 'w', encoding='utf-8') as csvfile: writer = csv.writer(csvfile) headers = [ 'case id', 'case type', 'creating form id', 'referenced id', 'referenced_type', 'index relationship', 'index identifier', 'owner id', 'owner name', 'opened by id', 'opened by name', ] if debug: headers.append('app version') writer.writerow(headers) for case in cases_with_invalid_references: for index in case['indices']: if index['referenced_id'] in invalid_referenced_ids: form_id = case['xform_ids'][0] row = [ case['_id'], case['type'], form_id, index['referenced_id'], index['referenced_type'], index['relationship'], index['identifier'], case['owner_id'], cached_owner_id_to_display(case['owner_id']), case['opened_by'], cached_owner_id_to_display(case['opened_by']), ] if debug: form = FormAccessors( domain=domain).get_form(form_id) app_version_info = get_app_version_info( domain, form.build_id, form.form_data['@version'], form.metadata, ) row.append(app_version_info.build_version) writer.writerow(row) if cleanup: missing = set() deleted = set() exists = set() for invalid_id in invalid_referenced_ids: try: case = CaseAccessors(domain).get_case(invalid_id) except CaseNotFound: missing.add(invalid_id) else: if case.is_deleted: deleted.add(case) else: exists.add(case)
def get_base_case_es_query(start=0, size=DEFAULT_SIZE): return (CaseES().remove_default_filters().domain(PACT_DOMAIN).start( start).size(size))
def get_case_ids(cls, domain, case_type, boundary_date=None): query = CaseES().domain(domain).case_type(case_type).is_closed(closed=False).exclude_source() if boundary_date is not None: query = query.server_modified_range(lte=boundary_date) results = query.run() return results.doc_ids