def handle(self, log_file, **options): self.domain = 'hki-nepal-suaahara-2' loc_mapping = {} locs = SQLLocation.objects.filter(domain=self.domain, level=4) for loc in locs: loc_mapping[loc.site_code] = loc.location_id failed_updates = [] household_cases = CaseES().domain(self.domain).case_type('household').count() member_cases = CaseES().domain(self.domain).case_type('household_member').count() total_cases = household_cases + member_cases with open(log_file, "w", encoding='utf-8') as fh: fh.write('--------Successful Form Ids----------') for cases in chunked(with_progress_bar(self._get_cases_to_process(), total_cases), 100): cases_to_update = self._process_cases(cases, failed_updates, loc_mapping) try: xform, cases = bulk_update_cases( self.domain, cases_to_update, self.__module__) fh.write(xform.form_id) except LocalSubmissionError as e: print(six.text_type(e)) failed_updates.extend(case[0] for case in cases_to_update) fh.write('--------Failed Cases--------------') for case_id in failed_updates: fh.write(case_id)
def test_case_soft_deletion(self): case_id, case_name = self._create_case_and_sync_to_es() # verify there results = CaseES().run() self.assertEqual(1, results.total) # soft delete the case with self.process_case_changes: CaseAccessors(self.domain).soft_delete_cases([case_id]) self.elasticsearch.indices.refresh(CASE_INDEX_INFO.index) # ensure not there anymore results = CaseES().run() self.assertEqual(0, results.total)
def _cases_created_per_user_per_month(self, case_type=None): query = (CaseES(es_instance_alias=ES_EXPORT_INSTANCE).domain( self.domain).opened_range( gte=self.date_start, lt=self.date_end).aggregation( TermsAggregation('cases_per_user', 'owner_id', size=100).aggregation( DateHistogram('cases_by_date', 'opened_on', interval='month')))) if case_type: query = query.case_type(case_type) results = query.size(0).run() stats = defaultdict(list) cases_per_user = results.aggregations.cases_per_user for bucket in cases_per_user.buckets_list: counts_by_date = { b['key_as_string']: b['doc_count'] for b in bucket.cases_by_date.normalized_buckets } for key, count in counts_by_date.items(): stats[key].append(count) final_stats = [] for month, case_count_list in sorted(list(stats.items()), key=lambda r: r[0]): final_stats.append( (month, sum(case_count_list) // len(case_count_list))) suffix = '' if case_type: suffix = '(case type: %s)' % case_type self._print_table( ['Month', 'Cases created per user %s' % suffix], final_stats)
def test_case_soft_deletion(self): case_id, case_name = self._create_case_and_sync_to_es() # verify there results = CaseES().run() self.assertEqual(1, results.total) # soft delete the case with process_pillow_changes('case-pillow', {'skip_ucr': True}): with process_pillow_changes('DefaultChangeFeedPillow'): CaseAccessors(self.domain).soft_delete_cases([case_id]) self.elasticsearch.indices.refresh(CASE_INDEX_INFO.index) # ensure not there anymore results = CaseES().run() self.assertEqual(0, results.total)
def scroll_case_names(domain, case_ids): query = (CaseES() .domain(domain) .case_ids(case_ids) .source(['name', '_id']) .size(CASE_SCROLL_SIZE)) return query.scroll()
def _get_case_counts_by_user(domain, datespan, case_types=None, is_opened=True, user_ids=None, export=False): date_field = 'opened_on' if is_opened else 'closed_on' user_field = 'opened_by' if is_opened else 'closed_by' es_instance = ES_EXPORT_INSTANCE if export else ES_DEFAULT_INSTANCE case_query = (CaseES(es_instance_alias=es_instance).domain(domain).filter( filters.date_range( date_field, gte=datespan.startdate.date(), lte=datespan.enddate.date(), )).terms_aggregation(user_field, 'by_user').size(0)) if case_types: case_query = case_query.case_type(case_types) else: case_query = case_query.filter( filters.NOT(case_type_filter('commcare-user'))) if user_ids: case_query = case_query.filter(filters.term(user_field, user_ids)) return case_query.run().aggregations.by_user.counts_by_bucket()
def _cases_updated_per_user_per_month(self): results = (CaseES(es_instance_alias=ES_EXPORT_INSTANCE).domain( self.domain).active_in_range( gte=self.date_start, lt=self.date_end).aggregation( TermsAggregation( 'cases_per_user', 'owner_id', size=100).aggregation( NestedAggregation( 'actions', 'actions').aggregation( DateHistogram( 'cases_by_date', 'server_date', interval='month')))).size(0).run()) stats = defaultdict(list) cases_per_user = results.aggregations.cases_per_user for bucket in cases_per_user.buckets_list: counts_by_date = { b['key_as_string']: b['doc_count'] for b in bucket.actions.cases_by_date.normalized_buckets } for key, count in counts_by_date.items(): stats[key].append(count) final_stats = [] for month, case_count_list in sorted(list(stats.items()), key=lambda r: r[0]): final_stats.append( (month, sum(case_count_list) // len(case_count_list))) self._print_table(['Month', 'Cases updated per user'], final_stats)
def options(self): query = CaseES("report_cases").domain( self.domain).case_type("participant") user = self.request.couch_user if user.is_web_user(): owner_ids = [o.lower() for o in user.get_group_ids() if o] if owner_ids: query = query.filter( filters.OR(owner(owner_ids), user_filter(user._id.lower()))) else: query = query.user(user._id.lower()) results = query.values("full_name.#value") return [(case['_id'], case['full_name']['#value']) for case in results]
def test_location_restricted_cases(self): domain_obj = bootstrap_domain(self.domain) self.addCleanup(domain_obj.delete) location_type_names = ['state', 'county', 'city'] location_structure = [('Massachusetts', [('Middlesex', [ ('Cambridge', []), ('Somerville', []), ]), ('Suffolk', [ ('Boston', []), ])])] locations = setup_locations_and_types(self.domain, location_type_names, [], location_structure)[1] middlesex_user = CommCareUser.create(self.domain, 'guy-from-middlesex', '***', None, None) middlesex_user.add_to_assigned_locations(locations['Middlesex']) restrict_user_by_location(self.domain, middlesex_user) fake_request = MagicMock() fake_request.domain = self.domain fake_request.couch_user = middlesex_user self._send_case_to_es(owner_id=locations['Boston'].get_id) middlesex_case = self._send_case_to_es( owner_id=locations['Middlesex'].get_id) cambridge_case = self._send_case_to_es( owner_id=locations['Cambridge'].get_id) returned_case_ids = query_location_restricted_cases( CaseES().domain(self.domain), fake_request).get_ids() self.assertItemsEqual(returned_case_ids, [middlesex_case.case_id, cambridge_case.case_id])
def get_case_export_base_query(domain, case_type): query = (CaseES().domain(domain).case_type(case_type)) if not EXPORT_NO_SORT.enabled(domain): query = query.sort("opened_on") return query
def _get_es_modified_dates(case_ids): results = (CaseES(es_instance_alias=ES_EXPORT_INSTANCE).case_ids( case_ids).values_list('_id', 'server_modified_on', 'domain')) return { _id: (iso_string_to_datetime(server_modified_on), domain) for _id, server_modified_on, domain in results }
def _get_case_counts_by_user(domain, datespan, case_types=None, is_opened=True, owner_ids=None): date_field = 'opened_on' if is_opened else 'closed_on' user_field = 'opened_by' if is_opened else 'closed_by' case_query = (CaseES() .domain(domain) .filter( filters.date_range( date_field, gte=datespan.startdate.date(), lte=datespan.enddate.date(), ) ) .terms_aggregation(user_field, 'by_user') .size(0)) if case_types: case_query = case_query.case_type(case_types) else: case_query = case_query.filter(filters.NOT(case_type_filter('commcare-user'))) if owner_ids: case_query = case_query.filter(filters.term(user_field, owner_ids)) return case_query.run().aggregations.by_user.counts_by_bucket()
def handle(self, domain, log_file, **options): total_cases = CaseES().domain(domain).case_type( 'household').is_closed().count() self.case_accessor = CaseAccessors(domain) failed_updates = [] with open(log_file, "w", encoding='utf-8') as fh: fh.write('--------Successful Form Ids----------\n') for cases in chunked( with_progress_bar(self._get_cases_to_process(domain), total_cases), 100): related_cases = self._get_related_cases(cases) case_tupes = [(case_id, {}, True) for case_id in related_cases] try: xform, cases = bulk_update_cases(domain, case_tupes, self.__module__) fh.write(xform.form_id + '\n') except LocalSubmissionError as e: print('submission error') print(six.text_type(e)) failed_updates.extend(related_cases) except Exception as e: print('unexpected error') print(six.text_type(e)) failed_updates.extend(related_cases) fh.write('--------Failed Cases--------------\n') for case_id in failed_updates: fh.write(case_id) print('-------------COMPLETE--------------')
class ReportCaseReindexerTest(TestCase): def setUp(self): super(ReportCaseReindexerTest, self).setUp() FormProcessorTestUtils.delete_all_xforms() FormProcessorTestUtils.delete_all_cases() with trap_extra_setup(ConnectionError): self.elasticsearch = get_es_new() ensure_index_deleted(REPORT_CASE_INDEX_INFO.index) def tearDown(self): FormProcessorTestUtils.delete_all_xforms() FormProcessorTestUtils.delete_all_cases() ensure_index_deleted(REPORT_CASE_INDEX_INFO.index) super(ReportCaseReindexerTest, self).tearDown() @run_with_all_backends def test_report_case_reindexer(self): cases_included = set() for i in range(3): case = create_and_save_a_case(DOMAIN, uuid.uuid4().hex, 'case_name-{}'.format(i)) cases_included.add(case.case_id) # excluded case create_and_save_a_case('unsupported', uuid.uuid4().hex, 'unsupported') reindex_and_clean('report-case') # verify there results = CaseES("report_cases").run() self.assertEqual(3, results.total, results.hits) ids_in_es = {doc['_id'] for doc in results.hits} self.assertEqual(cases_included, ids_in_es)
def _get_case_case_counts_by_owner(domain, datespan, case_types, is_total=False, owner_ids=None, export=False): es_instance = ES_EXPORT_INSTANCE if export else ES_DEFAULT_INSTANCE case_query = (CaseES( es_instance_alias=es_instance).domain(domain).opened_range( lte=datespan.enddate.date()).NOT( closed_range_filter( lt=datespan.startdate.date())).terms_aggregation( 'owner_id', 'owner_id').size(0)) if case_types: case_query = case_query.filter({"terms": {"type.exact": case_types}}) else: case_query = case_query.filter( filters.NOT(case_type_filter('commcare-user'))) if not is_total: case_query = case_query.active_in_range( gte=datespan.startdate.date(), lte=datespan.enddate.date(), ) if owner_ids: case_query = case_query.owner(owner_ids) return case_query.run().aggregations.owner_id.counts_by_bucket()
def _get_es_modified_dates(case_ids): results = (CaseES(for_export=True).case_ids(case_ids).values_list( '_id', 'server_modified_on', 'domain')) return { _id: (iso_string_to_datetime(server_modified_on) if server_modified_on else None, domain) for _id, server_modified_on, domain in results }
def get_case_export_base_query(domain, case_type): query = (CaseES(es_instance_alias=ES_EXPORT_INSTANCE).domain( domain).case_type(case_type)) if not EXPORT_NO_SORT.enabled(domain): query = query.sort("opened_on") return query
def options(self): res = (CaseES('report_cases').domain(self.domain).exists( 'care_site_display.#value').source('care_site_display').run()) care_sites = {c['care_site_display']['#value'] for c in res.hits} return [{ 'val': care_site, 'text': care_site } for care_site in care_sites]
def get_case_ids_missing_from_elasticsearch(all_case_ids): missing_from_elasticsearch = set() for case_ids in chunked(all_case_ids, 500): case_ids = set(case_ids) not_missing = set(CaseES().doc_id(case_ids).get_ids()) missing_from_elasticsearch.update(case_ids - not_missing) assert not_missing - case_ids == set() return list(missing_from_elasticsearch)
def gen_children_only_ours(domain): """ Returns a list of child_gmp cases where external_id is not set """ result = (CaseES().domain(domain).case_type(CASE_TYPE).empty( 'external_id').run()) if result.total: for doc in result.hits: yield CommCareCase.wrap(doc)
def _get_child_cases(self, household_ids): query = (CaseES(es_instance_alias='export') .domain('icds-cas') .case_type('person') .is_closed(False) .source(SOURCE_FIELDS) .filter(filters.term("indices.referenced_id", household_ids)) ) return query.run()
def _get_closed_hh_cases(self, owners): query = (CaseES(es_instance_alias='export') .is_closed() .domain('icds-cas') .case_type('household') .owner(owners) .source(['case_id', 'closed_on', 'name']) .size(100) ) return query.scroll()
def test_report_case_pillow(self): case_id, case_name = self._create_case_and_sync_to_es(DOMAIN) # confirm change made it to elasticserach results = CaseES('report_cases').run() self.assertEqual(1, results.total) case_doc = results.hits[0] self.assertEqual(DOMAIN, case_doc['domain']) self.assertEqual(case_id, case_doc['_id']) self.assertEqual(case_name, case_doc['name'])
def get_duplicate_id_case_info(domain, case_type, limit_debug_to=None): total_cases = CaseES().domain(domain).case_type(case_type).count() bad_cases = get_duplicated_case_stubs(domain, case_type) add_debug_info_to_cases(bad_cases, limit_debug_to) context = { 'case_type': case_type, 'num_bad_cases': len(bad_cases), 'num_total_cases': total_cases, 'num_good_cases': total_cases - len(bad_cases), 'bad_cases': bad_cases, } return context
def run_messaging_rule(domain, rule_id): rule = _get_cached_rule(domain, rule_id) if not rule: return progress_helper = MessagingRuleProgressHelper(rule_id) total_cases_count = CaseES().domain(domain).case_type( rule.case_type).count() progress_helper.set_total_cases_to_be_processed(total_cases_count) db_aliases = get_db_aliases_for_partitioned_query() progress_helper.set_initial_progress(shard_count=len(db_aliases)) for db_alias in db_aliases: run_messaging_rule_for_shard.delay(domain, rule_id, db_alias)
def print_totals(self, domains): max_space = '\t' * (int(max([len(x) for x in domains]) / 8) + 2) header = 'Domain{}CaseES\t\tCaseSearchES\n'.format(max_space) divider = '{}\n'.format('*' * (len(header) + len(max_space) * 8)) self.stdout.write(divider) self.stdout.write(header) self.stdout.write(divider) for domain in domains: spacer = max_space[int(len(domain) / 8):] total_case_es = CaseES().domain(domain).count() total_case_search = CaseSearchES().domain(domain).count() self.stdout.write('{domain}{spacer}{case_es}\t\t{case_search}\n'.format( domain=domain, spacer=spacer, case_es=total_case_es, case_search=total_case_search, ))
def test_case_pillow_error_in_case_es(self): self.assertEqual(0, PillowError.objects.filter(pillow='case-pillow').count()) with patch('corehq.pillows.case_search.domain_needs_search_index', return_value=True), \ patch('corehq.pillows.case.transform_case_for_elasticsearch') as case_transform, \ patch('corehq.pillows.case_search.transform_case_for_elasticsearch') as case_search_transform: case_transform.side_effect = Exception('case_transform error') case_search_transform.side_effect = Exception('case_search_transform error') case_id, case_name = self._create_case_and_sync_to_es() # confirm change did not make it to case search index results = CaseSearchES().run() self.assertEqual(0, results.total) # confirm change did not make it to case index results = CaseES().run() self.assertEqual(0, results.total) self.assertEqual(1, PillowError.objects.filter(pillow='case-pillow').count())
def get_case_and_action_counts_for_domains(domains): actions_agg = aggregations.NestedAggregation('actions', 'actions') aggregation = aggregations.TermsAggregation( 'domain', 'domain').aggregation(actions_agg) results = CaseES() \ .filter(filters.term('domain', domains)) \ .aggregation(aggregation) \ .size(0) \ .run() domains_to_cases = results.aggregations.domain.buckets_dict def _domain_stats(domain_name): cases = domains_to_cases.get(domain_name, None) return { 'cases': cases.doc_count if cases else 0, 'case_actions': cases.actions.doc_count if cases else 0 } return {domain: _domain_stats(domain) for domain in domains}
def _get_case_case_counts_by_owner(domain, datespan, case_types, is_total=False, owner_ids=None): case_query = (CaseES() .domain(domain) .opened_range(lte=datespan.enddate) .NOT(closed_range_filter(lt=datespan.startdate)) .terms_aggregation('owner_id', 'owner_id') .size(0)) if case_types: case_query = case_query.filter({"terms": {"type.exact": case_types}}) else: case_query = case_query.filter(filters.NOT(case_type_filter('commcare-user'))) if not is_total: case_query = case_query.active_in_range( gte=datespan.startdate, lte=datespan.enddate ) if owner_ids: case_query = case_query.owner(owner_ids) return case_query.run().aggregations.owner_id.counts_by_bucket()
def run_messaging_rule(domain, rule_id): rule = _get_cached_rule(domain, rule_id) if not rule: return progress_helper = MessagingRuleProgressHelper(rule_id) total_cases_count = CaseES().domain(domain).case_type( rule.case_type).count() progress_helper.set_total_cases_to_be_processed(total_cases_count) def _run_rule_sequentially(): incr = 0 progress_helper.set_initial_progress() for case_id in get_case_ids_for_messaging_rule(domain, rule.case_type): sync_case_for_messaging_rule.delay(domain, case_id, rule_id) incr += 1 if incr >= 1000: incr = 0 progress_helper.update_total_key_expiry() if progress_helper.is_canceled(): break # By putting this task last in the queue, the rule should be marked # complete at about the time that the last tasks are finishing up. # This beats saving the task results in the database and using a # celery chord which would be more taxing on system resources. set_rule_complete.delay(rule_id) def _run_rule_on_multiple_shards(): db_aliases = get_db_aliases_for_partitioned_query() progress_helper.set_initial_progress(shard_count=len(db_aliases)) for db_alias in db_aliases: run_messaging_rule_for_shard.delay(domain, rule_id, db_alias) if should_use_sql_backend(domain): _run_rule_on_multiple_shards() else: _run_rule_sequentially()
def scroll_case_names(domain, case_ids): query = CaseES().domain(domain).case_ids(case_ids).source(["name", "_id"]).size(CASE_SCROLL_SIZE) return query.scroll()