def get_all_values(self, doc, eval_context=None): if not eval_context: eval_context = EvaluationContext(doc) if self.has_validations: try: self.validate_document(doc, eval_context) except ValidationError as e: for error in e.errors: InvalidUCRData.objects.get_or_create( doc_id=doc['_id'], indicator_config_id=self._id, validation_name=error[0], defaults={ 'doc_type': doc['doc_type'], 'domain': doc['domain'], 'validation_text': error[1], } ) return [] rows = [] for item in self.get_items(doc, eval_context): indicators = self.indicators.get_values(item, eval_context) rows.append(indicators) eval_context.increment_iteration() return rows
def _process_chunk_for_domain(self, domain, changes_chunk): adapters = list(self.table_adapters_by_domain[domain]) changes_by_id = {change.id: change for change in changes_chunk} to_delete_by_adapter = defaultdict(list) rows_to_save_by_adapter = defaultdict(list) async_configs_by_doc_id = defaultdict(list) to_update = {change for change in changes_chunk if not change.deleted} retry_changes, docs = self.get_docs_for_changes(to_update, domain) change_exceptions = [] for doc in docs: eval_context = EvaluationContext(doc) for adapter in adapters: if adapter.config.filter(doc): if adapter.run_asynchronous: async_configs_by_doc_id[doc['_id']].append(adapter.config._id) else: try: rows_to_save_by_adapter[adapter].extend(adapter.get_all_values(doc, eval_context)) except Exception as e: change_exceptions.append((changes_by_id[doc["_id"]], e)) eval_context.reset_iteration() elif adapter.config.deleted_filter(doc) or adapter.doc_exists(doc): to_delete_by_adapter[adapter].append(doc['_id']) # bulk delete by adapter to_delete = [c.id for c in changes_chunk if c.deleted] for adapter in adapters: delete_ids = to_delete_by_adapter[adapter] + to_delete try: adapter.bulk_delete(delete_ids) except Exception as ex: notify_exception( None, "Error in deleting changes chunk {ids}: {ex}".format( ids=delete_ids, ex=ex)) retry_changes.update([c for c in changes_chunk if c.id in delete_ids]) # bulk update by adapter for adapter, rows in six.iteritems(rows_to_save_by_adapter): try: adapter.save_rows(rows) except Exception as ex: notify_exception( None, "Error in saving changes chunk {ids}: {ex}".format( ids=[c.id for c in to_update], ex=repr(ex))) retry_changes.update(to_update) if async_configs_by_doc_id: doc_type_by_id = { _id: changes_by_id[_id].metadata.document_type for _id in async_configs_by_doc_id.keys() } AsyncIndicator.bulk_update_records(async_configs_by_doc_id, domain, doc_type_by_id) return retry_changes, change_exceptions
def get_all_values(self, doc, eval_context=None): if not eval_context: eval_context = EvaluationContext(doc) rows = [] for item in self.get_items(doc, eval_context): indicators = self.indicators.get_values(item, eval_context) rows.append(indicators) eval_context.increment_iteration() return rows
def process_change(self, change): self.bootstrap_if_needed() domain = change.metadata.domain if not domain or domain not in self.table_adapters_by_domain: # if no domain we won't save to any UCR table return if change.deleted: adapters = list(self.table_adapters_by_domain[domain]) for table in adapters: table.delete({'_id': change.metadata.document_id}) async_tables = [] doc = change.get_document() ensure_document_exists(change) ensure_matched_revisions(change, doc) if doc is None: return with TimingContext() as timer: eval_context = EvaluationContext(doc) # make copy to avoid modifying list during iteration adapters = list(self.table_adapters_by_domain[domain]) for table in adapters: if table.config.filter(doc): if table.run_asynchronous: async_tables.append(table.config._id) else: self._save_doc_to_table(domain, table, doc, eval_context) eval_context.reset_iteration() elif table.config.deleted_filter(doc) or table.doc_exists(doc): table.delete(doc) if async_tables: AsyncIndicator.update_from_kafka_change(change, async_tables) self.domain_timing_context.update(**{ domain: timer.duration })
def test_form_lookups(self): form = create_and_save_a_form(domain=self.domain) expression = self._get_expression('XFormInstance') doc = self._get_doc(form.form_id) self.assertEqual(form.form_id, expression(doc, EvaluationContext(doc, 0)))
def deleted_filter(self, document): filter_fn = self._get_deleted_filter() return filter_fn and filter_fn(document, EvaluationContext( document, 0))
email=user_email ) @task(queue=settings.CELERY_LOCATION_REASSIGNMENT_QUEUE) def process_ucr_changes(domain, case_ids): cases = CaseAccessorSQL.get_cases(case_ids) docs = [case.to_json() for case in cases] data_source_providers = [DynamicDataSourceProvider(), StaticDataSourceProvider()] all_configs = [ source for provider in data_source_providers for source in provider.by_domain(domain) ] adapters = [ get_indicator_adapter(config, raise_errors=True, load_source='location_reassignment') for config in all_configs ] for doc in docs: eval_context = EvaluationContext(doc) for adapter in adapters: if adapter.config.filter(doc, eval_context): rows_to_save = adapter.get_all_values(doc, eval_context) if rows_to_save: adapter.save_rows(rows_to_save, use_shard_col=False) else: adapter.delete(doc, use_shard_col=False)
def get_value(self, doc_id, context): assert context.root_doc['domain'] doc = self._get_document(self.related_doc_type, doc_id, context) # explicitly use a new evaluation context since this is a new document return self._value_expression(doc, EvaluationContext(doc, 0))
def test_from_outside_date_range(self): expression = self._make_expression('iteration - 2', 'iteration - 1') context = EvaluationContext({"domain": self.domain}, 0) forms = expression(self.case.to_json(), context) self.assertEqual(forms, [])
def test_voucher_properties(self): voucher_case = { '_id': 'voucher_case_id', 'domain': 'enikshay-test', 'date_fulfilled': '2017-09-28', 'voucher_issued_by_login_name': 'login_name', 'voucher_issued_by_name': 'name', 'voucher_issued_by_phone_number': '123456', 'voucher_issued_by_type': 'issued type', 'voucher_id': 'voucher_case_id', 'date_issued': '2017-09-28', 'state': 'test_state', 'amount_fulfilled': '123', 'voucher_fulfilled_by_login_name': 'fulfilled_login_name', 'voucher_fulfilled_by_name': 'fulfilled_name', 'voucher_fulfilled_by_phone_number': '654321', 'investigation_type_name': 'type' } date_fulfilled = self.get_expression('date_fulfilled', 'date') voucher_issued_by_login_name = self.get_expression( 'voucher_issued_by_login_name', 'string') voucher_issued_by_name = self.get_expression('voucher_issued_by_name', 'string') voucher_issued_by_phone_number = self.get_expression( 'voucher_issued_by_phone_number', 'string') voucher_id = self.get_expression('voucher_id', 'string') date_issued = self.get_expression('date_issued', 'date') state = self.get_expression('state', 'string') amount_fulfilled = self.get_expression('amount_fulfilled', 'integer') voucher_fulfilled_by_login_name = self.get_expression( 'voucher_fulfilled_by_login_name', 'string') voucher_fulfilled_by_name = self.get_expression( 'voucher_fulfilled_by_name', 'string') voucher_fulfilled_by_phone_number = self.get_expression( 'voucher_fulfilled_by_phone_number', 'string') investigation_type_name = self.get_expression( 'investigation_type_name', 'string') voucher_issued_by_type = self.get_expression('voucher_issued_by_type', 'string') self.assertEqual( date_fulfilled(voucher_case, EvaluationContext(voucher_case, 0)), '2017-09-28') self.assertEqual( voucher_issued_by_login_name(voucher_case, EvaluationContext(voucher_case, 0)), 'login_name') self.assertEqual( voucher_issued_by_name(voucher_case, EvaluationContext(voucher_case, 0)), 'name') self.assertEqual( voucher_issued_by_phone_number(voucher_case, EvaluationContext(voucher_case, 0)), '123456') self.assertEqual( voucher_id(voucher_case, EvaluationContext(voucher_case, 0)), 'voucher_case_id') self.assertEqual( date_issued(voucher_case, EvaluationContext(voucher_case, 0)), '2017-09-28') self.assertEqual( state(voucher_case, EvaluationContext(voucher_case, 0)), 'test_state') self.assertEqual( amount_fulfilled(voucher_case, EvaluationContext(voucher_case, 0)), '123') self.assertEqual( voucher_fulfilled_by_login_name(voucher_case, EvaluationContext(voucher_case, 0)), 'fulfilled_login_name') self.assertEqual( voucher_fulfilled_by_name(voucher_case, EvaluationContext(voucher_case, 0)), 'fulfilled_name') self.assertEqual( voucher_fulfilled_by_phone_number( voucher_case, EvaluationContext(voucher_case, 0)), '654321') self.assertEqual( investigation_type_name(voucher_case, EvaluationContext(voucher_case, 0)), 'type') self.assertEqual( voucher_issued_by_type(voucher_case, EvaluationContext(voucher_case, 0)), 'issued type')
def _check_expression(self, doc, expected, domain=None): domain = domain or self.domain_name self.assertEqual( expected, self.expression(doc, context=EvaluationContext({"domain": domain}, 0)))
def test_evaluation(self): context = EvaluationContext({"domain": self.domain}, 0) forms = self.expression(self.case.to_json(), context) self.assertEqual(len(forms), 1) self.assertEqual(forms, self.forms)
def test_wrong_domain(self): context = EvaluationContext({"domain": "wrong-domain"}, 0) forms = self.expression(self.case.to_json(), context) self.assertEqual(forms, [])
def test_from_incorrect_xmlns(self): expression = self._make_expression('iteration - 1', 'iteration + 1', 'silly-xmlns') context = EvaluationContext({"domain": self.domain}, 0) forms = expression(self.case.to_json(), context) self.assertEqual(forms, [])
def test_count_incorrect_xmlns(self): expression = self._make_expression('iteration - 1', 'iteration + 1', 'silly-xmlns', count=True) context = EvaluationContext({"domain": self.domain}, 0) count = expression(self.case.to_json(), context) self.assertEqual(count, 0)
def build_async_indicators(indicator_doc_ids): # written to be used with _queue_indicators, indicator_doc_ids must # be a chunk of 100 memoizers = {'configs': {}, 'adapters': {}} assert (len(indicator_doc_ids)) <= ASYNC_INDICATOR_CHUNK_SIZE def handle_exception(exception, config_id, doc, adapter): metric = None if isinstance(exception, (ProtocolError, ReadTimeout)): metric = 'commcare.async_indicator.riak_error' elif isinstance(exception, (ESError, ConnectionTimeout)): # a database had an issue so log it and go on to the next document metric = 'commcare.async_indicator.es_error' elif isinstance(exception, (DatabaseError, InternalError)): # a database had an issue so log it and go on to the next document metric = 'commcare.async_indicator.psql_error' else: # getting the config could fail before the adapter is set if adapter: adapter.handle_exception(doc, exception) if metric: metrics_counter(metric, tags={'config_id': config_id}) def doc_ids_from_rows(rows): formatted_rows = [{ column.column.database_column_name.decode('utf-8'): column.value for column in row } for row in rows] return set(row['doc_id'] for row in formatted_rows) def _get_config(config_id): config_by_id = memoizers['configs'] if config_id in config_by_id: return config_by_id[config_id] else: config = get_ucr_datasource_config_by_id(config_id) config_by_id[config_id] = config return config def _get_adapter(config): adapter_by_config = memoizers['adapters'] if config._id in adapter_by_config: return adapter_by_config[config._id] else: adapter = get_indicator_adapter( config, load_source='build_async_indicators') adapter_by_config[config._id] = adapter return adapter def _metrics_timer(step, config_id=None): tags = { 'action': step, } if config_id and settings.ENTERPRISE_MODE: tags['config_id'] = config_id else: # Prometheus requires consistent tags even if not available tags['config_id'] = None return metrics_histogram_timer('commcare.async_indicator.timing', timing_buckets=(.03, .1, .3, 1, 3, 10), tags=tags) # tracks processed/deleted configs to be removed from each indicator configs_to_remove_by_indicator_id = defaultdict(list) def _mark_config_to_remove(config_id, indicator_ids): for _id in indicator_ids: configs_to_remove_by_indicator_id[_id].append(config_id) timer = TimingContext() lock_keys = [ get_async_indicator_modify_lock_key(indicator_doc_id) for indicator_doc_id in indicator_doc_ids ] with CriticalSection(lock_keys): all_indicators = AsyncIndicator.objects.filter( doc_id__in=indicator_doc_ids) if not all_indicators: return doc_store = get_document_store_for_doc_type( all_indicators[0].domain, all_indicators[0].doc_type, load_source="build_async_indicators", ) failed_indicators = set() rows_to_save_by_adapter = defaultdict(list) docs_to_delete_by_adapter = defaultdict(list) # there will always be one AsyncIndicator per doc id indicator_by_doc_id = {i.doc_id: i for i in all_indicators} config_ids = set() with timer: for doc in doc_store.iter_documents( list(indicator_by_doc_id.keys())): indicator = indicator_by_doc_id[doc['_id']] eval_context = EvaluationContext(doc) for config_id in indicator.indicator_config_ids: with _metrics_timer('transform', config_id): config_ids.add(config_id) try: config = _get_config(config_id) except (ResourceNotFound, DataSourceConfigurationNotFoundError): celery_task_logger.info( "{} no longer exists, skipping".format( config_id)) # remove because the config no longer exists _mark_config_to_remove(config_id, [indicator.pk]) continue except ESError: celery_task_logger.info( "ES errored when trying to retrieve config") failed_indicators.add(indicator) continue adapter = None try: adapter = _get_adapter(config) rows_to_save = adapter.get_all_values( doc, eval_context) if rows_to_save: rows_to_save_by_adapter[adapter].extend( rows_to_save) else: docs_to_delete_by_adapter[adapter].append(doc) eval_context.reset_iteration() except Exception as e: failed_indicators.add(indicator) handle_exception(e, config_id, doc, adapter) with _metrics_timer('single_batch_update'): for adapter, rows in rows_to_save_by_adapter.items(): doc_ids = doc_ids_from_rows(rows) indicators = [ indicator_by_doc_id[doc_id] for doc_id in doc_ids ] try: with _metrics_timer('update', adapter.config._id): adapter.save_rows(rows, use_shard_col=True) except Exception as e: failed_indicators.union(indicators) message = str(e) notify_exception( None, "Exception bulk saving async indicators:{}".format( message)) else: # remove because it's successfully processed _mark_config_to_remove(config_id, [i.pk for i in indicators]) with _metrics_timer('single_batch_delete'): for adapter, docs in docs_to_delete_by_adapter.items(): with _metrics_timer('delete', adapter.config._id): adapter.bulk_delete(docs) # delete fully processed indicators processed_indicators = set(all_indicators) - failed_indicators AsyncIndicator.objects.filter( pk__in=[i.pk for i in processed_indicators]).delete() # update failure for failed indicators with transaction.atomic(): for indicator in failed_indicators: indicator.update_failure( configs_to_remove_by_indicator_id.get(indicator.pk, [])) indicator.save() metrics_counter('commcare.async_indicator.processed_success', len(processed_indicators)) metrics_counter('commcare.async_indicator.processed_fail', len(failed_indicators)) metrics_counter('commcare.async_indicator.processing_time', timer.duration, tags={'config_ids': config_ids}) metrics_counter('commcare.async_indicator.processed_total', len(indicator_doc_ids), tags={'config_ids': config_ids})
def get_all_values(self, doc): return [ self.indicators.get_values(item, EvaluationContext(doc, i)) for i, item in enumerate(self.get_items(doc)) ]
def _build_async_indicators(indicator_doc_ids): def handle_exception(exception, config_id, doc, adapter): metric = None if isinstance(exception, (ProtocolError, ReadTimeout)): metric = 'commcare.async_indicator.riak_error' elif isinstance(exception, (ESError, ConnectionTimeout)): # a database had an issue so log it and go on to the next document metric = 'commcare.async_indicator.es_error' elif isinstance(exception, (DatabaseError, InternalError)): # a database had an issue so log it and go on to the next document metric = 'commcare.async_indicator.psql_error' else: # getting the config could fail before the adapter is set if adapter: adapter.handle_exception(doc, exception) if metric: datadog_counter(metric, 1, tags={ 'config_id': config_id, 'doc_id': doc['_id'] }) def doc_ids_from_rows(rows): formatted_rows = [{ column.column.database_column_name.decode('utf-8'): column.value for column in row } for row in rows] return set(row['doc_id'] for row in formatted_rows) # tracks processed/deleted configs to be removed from each indicator configs_to_remove_by_indicator_id = defaultdict(list) def _mark_config_to_remove(config_id, indicator_ids): for _id in indicator_ids: configs_to_remove_by_indicator_id[_id].append(config_id) timer = TimingContext() lock_keys = [ get_async_indicator_modify_lock_key(indicator_id) for indicator_id in indicator_doc_ids ] with CriticalSection(lock_keys): all_indicators = AsyncIndicator.objects.filter( doc_id__in=indicator_doc_ids) if not all_indicators: return doc_store = get_document_store_for_doc_type( all_indicators[0].domain, all_indicators[0].doc_type, load_source="build_async_indicators", ) failed_indicators = set() rows_to_save_by_adapter = defaultdict(list) indicator_by_doc_id = {i.doc_id: i for i in all_indicators} config_ids = set() with timer: for doc in doc_store.iter_documents( list(indicator_by_doc_id.keys())): indicator = indicator_by_doc_id[doc['_id']] eval_context = EvaluationContext(doc) for config_id in indicator.indicator_config_ids: config_ids.add(config_id) try: config = _get_config_by_id(config_id) except (ResourceNotFound, StaticDataSourceConfigurationNotFoundError): celery_task_logger.info( "{} no longer exists, skipping".format(config_id)) # remove because the config no longer exists _mark_config_to_remove(config_id, [indicator.pk]) continue except ESError: celery_task_logger.info( "ES errored when trying to retrieve config") failed_indicators.add(indicator) continue adapter = None try: adapter = get_indicator_adapter( config, load_source='build_async_indicators') rows_to_save_by_adapter[adapter].extend( adapter.get_all_values(doc, eval_context)) eval_context.reset_iteration() except Exception as e: failed_indicators.add(indicator) handle_exception(e, config_id, doc, adapter) for adapter, rows in six.iteritems(rows_to_save_by_adapter): doc_ids = doc_ids_from_rows(rows) indicators = [ indicator_by_doc_id[doc_id] for doc_id in doc_ids ] try: adapter.save_rows(rows) except Exception as e: failed_indicators.union(indicators) message = six.text_type(e) notify_exception( None, "Exception bulk saving async indicators:{}".format( message)) else: # remove because it's sucessfully processed _mark_config_to_remove(config_id, [i.pk for i in indicators]) # delete fully processed indicators processed_indicators = set(all_indicators) - failed_indicators AsyncIndicator.objects.filter( pk__in=[i.pk for i in processed_indicators]).delete() # update failure for failed indicators with transaction.atomic(): for indicator in failed_indicators: indicator.update_failure( configs_to_remove_by_indicator_id.get(indicator.pk, [])) indicator.save() datadog_counter('commcare.async_indicator.processed_success', len(processed_indicators)) datadog_counter('commcare.async_indicator.processed_fail', len(failed_indicators)) datadog_histogram('commcare.async_indicator.processing_time', timer.duration / len(indicator_doc_ids), tags=[ 'config_ids:{}'.format(config_ids), ])
def test_other_lookups(self): user_id = uuid.uuid4().hex CommCareUser.get_db().save_doc({'_id': user_id, 'domain': self.domain}) expression = self._get_expression('CommCareUser') doc = self._get_doc(user_id) self.assertEqual(user_id, expression(doc, EvaluationContext(doc, 0)))
def test_default(self): self.assertEqual(0, self.spec({}, EvaluationContext({})))
def test_person_properties(self): voucher_case = { '_id': 'voucher_case_id', 'domain': 'enikshay-test', 'date_fulfilled': '2017-09-28', 'voucher_type': 'test', } person_case = { '_id': 'person_case_id', 'person_id': 'person_case_id', 'name': 'test_name', 'domain': 'enikshay-test', 'phone_number': '123432', 'owner_id': 'owner-id', 'date_of_registration': '2017-09-28', } investigation_form = { "_id": 'investigation_form_id', "domain": "enikshay-test", "form": { "beneficiary_data": { "person_id": "person_case_id", } }, "xmlns": "http://openrosa.org/formdesigner/f710654022ff2d0653b315b71903257dbf53249b", } self.database.mock_docs = { 'voucher_case_id': voucher_case, 'person_case_id': person_case, 'investigation_form_id': investigation_form } person_owner_id = self.get_expression('person_owner_id', 'string') person_id = self.get_expression('person_id', 'string') person_name = self.get_expression('name', 'string') phone_number = self.get_expression('phone_number', 'string') date_of_registration = self.get_expression('date_of_registration', 'date') with mock.patch.object(FirstCaseFormWithXmlns, '__call__', return_value=investigation_form): self.assertEqual( person_owner_id(voucher_case, EvaluationContext(voucher_case, 0)), 'owner-id') self.assertEqual( person_id(voucher_case, EvaluationContext(voucher_case, 0)), 'person_case_id') self.assertEqual( person_name(voucher_case, EvaluationContext(voucher_case, 0)), 'test_name') self.assertEqual( phone_number(voucher_case, EvaluationContext(voucher_case, 0)), '123432') self.assertEqual( date_of_registration(voucher_case, EvaluationContext(voucher_case, 0)), '2017-09-28')
def test_value_set(self): self.assertEqual(7, self.spec({}, EvaluationContext({}, iteration=7)))
def test_location_parent_bad_domain(self): self.assertEqual( None, self.expression({'location_id': self.child.location_id}, EvaluationContext({"domain": 'bad-domain'})))
def test_cache(self): context = EvaluationContext({}) context.set_cache_value(('k1', 'k2'), 'v1') self.assertEqual(context.get_cache_value(('k1', 'k2')), 'v1') self.assertEqual(context.get_cache_value(('k1', )), None)
def test_target_form_properties(self): form = { 'id': 'form_id', 'xmlns': TARGET_XMLNS, 'domain': 'champ_cameroon', 'form': { 'locations': { 'district': 'test district', 'cbo': 'test cbo', 'clienttype': 'fsw_test_client_type', 'userpl': 'test userpl' }, 'fiscal_year': '2017', 'target_kp_prev': 15, 'target_htc_tst': 54, 'target_htc_pos': 35, 'target_care_new': 16, 'target_tx_new': 11, 'target_tx_undetect': 20 } } district = self.get_expression('district', 'string') cbo = self.get_expression('cbo', 'string') clienttype = self.get_expression('clienttype', 'string') userpl = self.get_expression('userpl', 'string') fiscal_year = self.get_expression('fiscal_year', 'integer') target_kp_prev = self.get_expression('target_kp_prev', 'integer') target_htc_tst = self.get_expression('target_htc_tst', 'integer') target_htc_pos = self.get_expression('target_htc_pos', 'integer') target_care_new = self.get_expression('target_care_new', 'integer') target_tx_new = self.get_expression('target_tx_new', 'integer') target_tx_undetect = self.get_expression('target_tx_undetect', 'integer') self.assertEqual(district(form, EvaluationContext(form, 0)), 'test district') self.assertEqual(cbo(form, EvaluationContext(form, 0)), 'test cbo') self.assertEqual(clienttype(form, EvaluationContext(form, 0)), 'fsw') self.assertEqual(userpl(form, EvaluationContext(form, 0)), 'test userpl') self.assertEqual(fiscal_year(form, EvaluationContext(form, 0)), '2017') self.assertEqual(target_kp_prev(form, EvaluationContext(form, 0)), 15) self.assertEqual(target_htc_tst(form, EvaluationContext(form, 0)), 54) self.assertEqual(target_htc_pos(form, EvaluationContext(form, 0)), 35) self.assertEqual(target_care_new(form, EvaluationContext(form, 0)), 16) self.assertEqual(target_tx_new(form, EvaluationContext(form, 0)), 11) self.assertEqual(target_tx_undetect(form, EvaluationContext(form, 0)), 20)
def test_not_in_context(self): self.assertEqual( None, self.expression({"base_property": "item_value"}, context=EvaluationContext({}, 0)))
def filter(self, document, eval_context=None): if eval_context is None: eval_context = EvaluationContext(document) filter_fn = self._get_main_filter() return filter_fn(document, eval_context)
def test_visite_de_l_operatour_properties_for_post_test_xmlns(self): form = { 'id': 'form_id', 'xmlns': YEKSI_NAA_REPORTS_VISITE_DE_L_OPERATOUR_PER_PRODUCT, 'domain': 'test-pna', 'form': { 'location_id': 'a025fa0f80c8451aabe5040c9dfc5efe', 'region_name': 'Dakar', 'PPS_name': 'PPS 1', 'district_name': 'District Rufisque', 'confirmed_products_update': { 'products_update': [{ 'question1': { 'loss_amt': 0, 'expired_pna_valuation': 0 }, 'final_pna_stock': 1, 'final_pna_stock_valuation': 1, 'real_date_repeat': '2018-03-07', 'product_name': 'EFAVIRENZ 600MG CP.', 'product_id': '288a455ae0a0625f935374ff18aa4d20', 'site_code': 'dakar_rufisque_pps 1', 'PPS_name': 'PPS 1', }, { 'question1': { 'loss_amt': 0, 'expired_pna_valuation': 0 }, 'final_pna_stock': 1, 'final_pna_stock_valuation': 1, 'real_date_repeat': '2018-03-07', 'product_name': 'NEVIRAPINE 200MG CP.', 'product_id': '288a455ae0a0625f935374ff18a98a6d', 'site_code': 'dakar_rufisque_pps 1', 'PPS_name': 'PPS 1', }], }, 'supply-point': 'fd79174541fa4f3b9924af69ee3db7ad', 'site_code': 'dakar_rufisque_pps 1', } } user = { 'id': 'user_id', 'domain': 'test-pna', 'location_id': 'test_location_id' } self.database.mock_docs = {'form_id': form, 'user_id': user} pps_id = self.get_expression('pps_id', 'string') region_name = self.get_expression('region_name', 'string') pps_name = self.get_expression('pps_name', 'string') district_name = self.get_expression('district_name', 'string') supply_point = self.get_expression('supply-point', 'string') base_item_expression = self.get_expressions_from_base_item_expression() repeat_items = base_item_expression(form, EvaluationContext(form, 0)) loss_amt = self.get_expression('loss_amt', 'integer') expired_pna_valuation = self.get_expression('expired_pna_valuation', 'integer') final_pna_stock = self.get_expression('final_pna_stock', 'integer') final_pna_stock_valuation = self.get_expression( 'final_pna_stock_valuation', 'integer') real_date_repeat = self.get_expression('real_date_repeat', 'date') product_name = self.get_expression('product_name', 'string') product_id = self.get_expression('product_id', 'string') site_code = self.get_expression('site_code', 'string') PPS_name = self.get_expression('PPS_name', 'string') self.assertEqual(pps_id(form, EvaluationContext(form, 0)), 'a025fa0f80c8451aabe5040c9dfc5efe') self.assertEqual(region_name(form, EvaluationContext(form, 0)), 'Dakar') self.assertEqual(pps_name(form, EvaluationContext(form, 0)), 'PPS 1') self.assertEqual(district_name(form, EvaluationContext(form, 0)), 'District Rufisque') self.assertEqual(supply_point(form, EvaluationContext(form, 0)), 'fd79174541fa4f3b9924af69ee3db7ad') self.assertEqual( loss_amt(repeat_items[0], EvaluationContext(repeat_items[0], 0)), 0) self.assertEqual( expired_pna_valuation(repeat_items[0], EvaluationContext(repeat_items[0], 0)), 0) self.assertEqual( final_pna_stock(repeat_items[0], EvaluationContext(repeat_items[0], 0)), 1) self.assertEqual( final_pna_stock_valuation(repeat_items[0], EvaluationContext(repeat_items[0], 0)), 1) self.assertEqual( real_date_repeat(repeat_items[0], EvaluationContext(repeat_items[0], 0)), date(2018, 3, 1)) self.assertEqual( product_name(repeat_items[0], EvaluationContext(repeat_items[0], 0)), 'EFAVIRENZ 600MG CP.') self.assertEqual( product_id(repeat_items[0], EvaluationContext(repeat_items[0], 0)), '288a455ae0a0625f935374ff18aa4d20') self.assertEqual( site_code(repeat_items[0], EvaluationContext(repeat_items[0], 0)), 'dakar_rufisque_pps 1') self.assertEqual( PPS_name(repeat_items[1], EvaluationContext(repeat_items[1], 0)), 'PPS 1') self.assertEqual( loss_amt(repeat_items[1], EvaluationContext(repeat_items[1], 0)), 0) self.assertEqual( expired_pna_valuation(repeat_items[1], EvaluationContext(repeat_items[1], 0)), 0) self.assertEqual( final_pna_stock(repeat_items[1], EvaluationContext(repeat_items[1], 0)), 1) self.assertEqual( final_pna_stock_valuation(repeat_items[1], EvaluationContext(repeat_items[1], 0)), 1) self.assertEqual( real_date_repeat(repeat_items[1], EvaluationContext(repeat_items[1], 0)), date(2018, 3, 1)) self.assertEqual( product_name(repeat_items[1], EvaluationContext(repeat_items[1], 0)), 'NEVIRAPINE 200MG CP.') self.assertEqual( product_id(repeat_items[1], EvaluationContext(repeat_items[1], 0)), '288a455ae0a0625f935374ff18a98a6d') self.assertEqual( site_code(repeat_items[1], EvaluationContext(repeat_items[1], 0)), 'dakar_rufisque_pps 1') self.assertEqual( PPS_name(repeat_items[1], EvaluationContext(repeat_items[1], 0)), 'PPS 1')
def filter(self, document): filter_fn = self._get_main_filter() return filter_fn(document, EvaluationContext(document, 0))
def _build_async_indicators(indicator_doc_ids): def handle_exception(exception, config_id, doc, adapter): metric = None if isinstance(exception, (ProtocolError, ReadTimeout)): metric = 'commcare.async_indicator.riak_error' elif isinstance(exception, (ESError, ConnectionTimeout)): # a database had an issue so log it and go on to the next document metric = 'commcare.async_indicator.es_error' elif isinstance(exception, (DatabaseError, InternalError)): # a database had an issue so log it and go on to the next document metric = 'commcare.async_indicator.psql_error' else: # getting the config could fail before the adapter is set if adapter: adapter.handle_exception(doc, exception) if metric: datadog_counter(metric, 1, tags={'config_id': config_id, 'doc_id': doc['_id']}) def doc_ids_from_rows(rows): formatted_rows = [ {column.column.database_column_name.decode('utf-8'): column.value for column in row} for row in rows ] return set(row['doc_id'] for row in formatted_rows) # tracks processed/deleted configs to be removed from each indicator configs_to_remove_by_indicator_id = defaultdict(list) def _mark_config_to_remove(config_id, indicator_ids): for _id in indicator_ids: configs_to_remove_by_indicator_id[_id].append(config_id) timer = TimingContext() lock_keys = [ get_async_indicator_modify_lock_key(indicator_id) for indicator_id in indicator_doc_ids ] with CriticalSection(lock_keys): all_indicators = AsyncIndicator.objects.filter( doc_id__in=indicator_doc_ids ) if not all_indicators: return doc_store = get_document_store_for_doc_type( all_indicators[0].domain, all_indicators[0].doc_type, load_source="build_async_indicators", ) failed_indicators = set() rows_to_save_by_adapter = defaultdict(list) indicator_by_doc_id = {i.doc_id: i for i in all_indicators} config_ids = set() with timer: for doc in doc_store.iter_documents(list(indicator_by_doc_id.keys())): indicator = indicator_by_doc_id[doc['_id']] eval_context = EvaluationContext(doc) for config_id in indicator.indicator_config_ids: config_ids.add(config_id) try: config = _get_config_by_id(config_id) except (ResourceNotFound, StaticDataSourceConfigurationNotFoundError): celery_task_logger.info("{} no longer exists, skipping".format(config_id)) # remove because the config no longer exists _mark_config_to_remove(config_id, [indicator.pk]) continue except ESError: celery_task_logger.info("ES errored when trying to retrieve config") failed_indicators.add(indicator) continue adapter = None try: adapter = get_indicator_adapter(config, load_source='build_async_indicators') rows_to_save_by_adapter[adapter].extend(adapter.get_all_values(doc, eval_context)) eval_context.reset_iteration() except Exception as e: failed_indicators.add(indicator) handle_exception(e, config_id, doc, adapter) for adapter, rows in six.iteritems(rows_to_save_by_adapter): doc_ids = doc_ids_from_rows(rows) indicators = [indicator_by_doc_id[doc_id] for doc_id in doc_ids] try: adapter.save_rows(rows) except Exception as e: failed_indicators.union(indicators) message = six.text_type(e) notify_exception(None, "Exception bulk saving async indicators:{}".format(message)) else: # remove because it's sucessfully processed _mark_config_to_remove( config_id, [i.pk for i in indicators] ) # delete fully processed indicators processed_indicators = set(all_indicators) - failed_indicators AsyncIndicator.objects.filter(pk__in=[i.pk for i in processed_indicators]).delete() # update failure for failed indicators with transaction.atomic(): for indicator in failed_indicators: indicator.update_failure( configs_to_remove_by_indicator_id.get(indicator.pk, []) ) indicator.save() datadog_counter('commcare.async_indicator.processed_success', len(processed_indicators)) datadog_counter('commcare.async_indicator.processed_fail', len(failed_indicators)) datadog_histogram( 'commcare.async_indicator.processing_time', timer.duration / len(indicator_doc_ids), tags=[ 'config_ids:{}'.format(config_ids), ] )
def _process_chunk_for_domain(self, domain, changes_chunk): adapters = list(self.table_adapters_by_domain[domain]) changes_by_id = {change.id: change for change in changes_chunk} to_delete_by_adapter = defaultdict(list) rows_to_save_by_adapter = defaultdict(list) async_configs_by_doc_id = defaultdict(list) to_update = {change for change in changes_chunk if not change.deleted} with self._metrics_timer('extract'): retry_changes, docs = bulk_fetch_changes_docs(to_update, domain) change_exceptions = [] with self._metrics_timer('single_batch_transform'): for doc in docs: change = changes_by_id[doc['_id']] doc_subtype = change.metadata.document_subtype eval_context = EvaluationContext(doc) with self._metrics_timer('single_doc_transform'): for adapter in adapters: with self._per_config_metrics_timer('transform', adapter.config._id): if adapter.config.filter(doc, eval_context): if adapter.run_asynchronous: async_configs_by_doc_id[doc['_id']].append(adapter.config._id) else: try: rows_to_save_by_adapter[adapter].extend(adapter.get_all_values(doc, eval_context)) except Exception as e: change_exceptions.append((change, e)) eval_context.reset_iteration() elif (doc_subtype is None or doc_subtype in adapter.config.get_case_type_or_xmlns_filter()): # Delete if the subtype is unknown or # if the subtype matches our filters, but the full filter no longer applies to_delete_by_adapter[adapter].append(doc) with self._metrics_timer('single_batch_delete'): # bulk delete by adapter to_delete = [{'_id': c.id} for c in changes_chunk if c.deleted] for adapter in adapters: delete_docs = to_delete_by_adapter[adapter] + to_delete if not delete_docs: continue with self._per_config_metrics_timer('delete', adapter.config._id): try: adapter.bulk_delete(delete_docs) except Exception: delete_ids = [doc['_id'] for doc in delete_docs] retry_changes.update([c for c in changes_chunk if c.id in delete_ids]) with self._metrics_timer('single_batch_load'): # bulk update by adapter for adapter, rows in rows_to_save_by_adapter.items(): with self._per_config_metrics_timer('load', adapter.config._id): try: adapter.save_rows(rows) except Exception: retry_changes.update(to_update) if async_configs_by_doc_id: with self._metrics_timer('async_config_load'): doc_type_by_id = { _id: changes_by_id[_id].metadata.document_type for _id in async_configs_by_doc_id.keys() } AsyncIndicator.bulk_update_records(async_configs_by_doc_id, domain, doc_type_by_id) return retry_changes, change_exceptions
def test_case_lookups(self): case_id = uuid.uuid4().hex create_and_save_a_case(domain=self.domain, case_id=case_id, case_name='related doc test case') expression = self._get_expression('CommCareCase') doc = self._get_doc(case_id) self.assertEqual(case_id, expression(doc, EvaluationContext(doc, 0)))
def test_comes_from_context(self): self.assertEqual( "base_value", self.expression({"base_property": "item_value"}, context=EvaluationContext( {"base_property": "base_value"}, 0)))
def test_cache(self): context = EvaluationContext({}) context.set_cache_value(('k1', 'k2'), 'v1') self.assertEqual(context.get_cache_value(('k1', 'k2')), 'v1') self.assertEqual(context.get_cache_value(('k1',)), None)
def test_champ_cametoon_properties_for_survi_medical_xmlns(self): form = { 'id': 'form_id', 'xmlns': SUIVI_MEDICAL_XMLNS, 'domain': 'champ_cameroon', 'form': { 'group': { 'age': 12, }, 'district': 'test district', 'visit_date': '2017-01-15', 'posttest_date': '2017-02-20', 'type_visit': 'first visit', 'age_range': '10-15 yrs', 'date_handshake': '2017-05-03', 'handshake_status': 'status', 'meta': { 'userID': 'user_id', 'timeEnd': '2017-01-31 20:00' }, 'seropostive_group': { 'first_art_date': '2017-02-03', }, 'load': { 'client_type': 'test client', 'hiv_status': 'positive', }, 'viral_load_group': { 'date_last_vl_test': '2017-01-29', 'undetect_vl': 'yes', } } } case = { 'district': 'test district', 'hiv_test_date': '2017-03-15', 'name': 'test uic', } user = { 'id': 'user_id', 'domain': 'champ_cameroon', 'location_id': 'test_location_id' } self.database.mock_docs = {'user_id': user} xmlns = self.get_expression('xmlns', 'string') uic = self.get_expression('uic', 'string') district = self.get_expression('district', 'string') hiv_test_date = self.get_expression('hiv_test_date', 'date') age_range = self.get_expression('age_range', 'string') posttest_date = self.get_expression('posttest_date', 'date') date_handshake = self.get_expression('date_handshake', 'date') first_art_date = self.get_expression('first_art_date', 'string') date_last_vl_test = self.get_expression('date_last_vl_test', 'string') client_type = self.get_expression('client_type', 'string') hiv_status = self.get_expression('hiv_status', 'string') handshake_status = self.get_expression('handshake_status', 'string') undetect_vl = self.get_expression('undetect_vl', 'string') form_completion = self.get_expression('form_completion', 'string') user_id = self.get_expression('user_id', 'string') htc_month = self.get_expression('htc_month', 'date') care_new_month = self.get_expression('care_new_month', 'date') organization = self.get_expression('organization', 'string') self.assertEqual(xmlns(form, EvaluationContext(form, 0)), SUIVI_MEDICAL_XMLNS) self.assertEqual(district(form, EvaluationContext(case, 0)), 'test district') self.assertEqual(uic(form, EvaluationContext(case, 0)), 'test uic') self.assertEqual(age_range(form, EvaluationContext(form, 0)), '10-15 yrs') self.assertEqual(date_handshake(form, EvaluationContext(form, 0)), '2017-05-03') self.assertEqual(first_art_date(form, EvaluationContext(form, 0)), '2017-02-03') self.assertEqual(date_last_vl_test(form, EvaluationContext(form, 0)), '2017-01-29') self.assertEqual(client_type(form, EvaluationContext(case, 0)), 'test client') self.assertEqual(posttest_date(form, EvaluationContext(form, 0)), '2017-02-20') self.assertEqual(hiv_status(form, EvaluationContext(case, 0)), 'positive') self.assertEqual(handshake_status(form, EvaluationContext(form, 0)), 'status') self.assertEqual(undetect_vl(form, EvaluationContext(form, 0)), 'yes') self.assertEqual(form_completion(form, EvaluationContext(form, 0)), '2017-01-31 20:00') self.assertEqual(user_id(form, EvaluationContext(form, 0)), 'user_id') self.assertEqual(htc_month(form, EvaluationContext(form, 0)), date(2017, 2, 1)) self.assertEqual(care_new_month(form, EvaluationContext(form, 0)), date(2017, 5, 1)) self.assertEqual(organization(form, EvaluationContext(form, 0)), 'test_location_id') self.assertEqual(hiv_test_date(form, EvaluationContext(case, 0)), '2017-03-15')
def test_related_doc_not_found(self): doc = {'parent_id': 'some-missing-id', 'domain': 'whatever'} self.assertEqual(None, self.expression(doc, EvaluationContext(doc, 0)))