def save_document(doc_ids): lock_keys = [] for doc_id in doc_ids: lock_keys.append(get_async_indicator_modify_lock_key(doc_id)) indicator_config_ids = None timer = TimingContext() with CriticalSection(lock_keys): indicators = AsyncIndicator.objects.filter(doc_id__in=doc_ids) if not indicators: return first_indicator = indicators[0] processed_indicators = [] failed_indicators = [] for i in indicators: assert i.domain == first_indicator.domain assert i.doc_type == first_indicator.doc_type indicator_by_doc_id = {i.doc_id: i for i in indicators} doc_store = get_document_store(first_indicator.domain, first_indicator.doc_type) indicator_config_ids = first_indicator.indicator_config_ids related_docs_to_rebuild = set() with timer: for doc in doc_store.iter_documents(indicator_by_doc_id.keys()): indicator = indicator_by_doc_id[doc['_id']] successfully_processed, to_remove, rebuild_related_docs = _save_document_helper( indicator, doc) if rebuild_related_docs: related_docs_to_rebuild = related_docs_to_rebuild.union( icds_get_related_docs_ids(doc['_id'])) if successfully_processed: processed_indicators.append(indicator.pk) else: failed_indicators.append((indicator, to_remove)) num_processed = len(processed_indicators) num_failed = len(failed_indicators) AsyncIndicator.objects.filter(pk__in=processed_indicators).delete() with transaction.atomic(): for indicator, to_remove in failed_indicators: indicator.update_failure(to_remove) indicator.save() # remove any related docs that were just rebuilt related_docs_to_rebuild = related_docs_to_rebuild - set(doc_ids) # queue the docs that aren't already queued _queue_indicators( AsyncIndicator.objects.filter(doc_id__in=related_docs_to_rebuild, date_queued=None)) datadog_counter('commcare.async_indicator.processed_success', num_processed) datadog_counter('commcare.async_indicator.processed_fail', num_failed) datadog_histogram('commcare.async_indicator.processing_time', timer.duration, tags=[u'config_ids:{}'.format(indicator_config_ids)])
def test_person_case(self): self.assertEqual(icds_get_related_docs_ids('p-' + self.ccs_record_id), []) self.assertEqual( icds_get_related_docs_ids('p-' + self.child_health_case_id), [])
def test_household_case(self): self.assertEqual(icds_get_related_docs_ids('hh-' + self.ccs_record_id), [])
def test_nonexistant_case(self): self.assertEqual(icds_get_related_docs_ids('nothing'), [])
def test_irrelavant_ccs_level_case(self): self.assertEqual( icds_get_related_docs_ids('ccs-other-' + self.ccs_record_id), [])
def test_child_health_case(self): self.assertEqual(icds_get_related_docs_ids(self.child_health_case_id), [self.ccs_record_id])
def _build_async_indicators(indicator_doc_ids): def handle_exception(exception, config_id, doc, adapter): metric = None if isinstance(exception, (ProtocolError, ReadTimeout)): metric = 'commcare.async_indicator.riak_error' elif isinstance(exception, (ESError, ConnectionTimeout)): # a database had an issue so log it and go on to the next document metric = 'commcare.async_indicator.es_error' elif isinstance(exception, (DatabaseError, InternalError)): # a database had an issue so log it and go on to the next document metric = 'commcare.async_indicator.psql_error' else: # getting the config could fail before the adapter is set if adapter: adapter.handle_exception(doc, exception) if metric: datadog_counter(metric, 1, tags={ 'config_id': config_id, 'doc_id': doc['_id'] }) def doc_ids_from_rows(rows): formatted_rows = [{ column.column.database_column_name: column.value for column in row } for row in rows] return set(row['doc_id'] for row in formatted_rows) # tracks processed/deleted configs to be removed from each indicator configs_to_remove_by_indicator_id = defaultdict(list) def _mark_config_to_remove(config_id, indicator_ids): for _id in indicator_ids: configs_to_remove_by_indicator_id[_id].append(config_id) timer = TimingContext() lock_keys = [ get_async_indicator_modify_lock_key(indicator_id) for indicator_id in indicator_doc_ids ] with CriticalSection(lock_keys): all_indicators = AsyncIndicator.objects.filter( doc_id__in=indicator_doc_ids) if not all_indicators: return doc_store = get_document_store_for_doc_type(all_indicators[0].domain, all_indicators[0].doc_type) related_doc_ids = set() failed_indicators = set() rows_to_save_by_adapter = defaultdict(list) indicator_by_doc_id = {i.doc_id: i for i in all_indicators} config_ids = set() with timer: for doc in doc_store.iter_documents( list(indicator_by_doc_id.keys())): indicator = indicator_by_doc_id[doc['_id']] eval_context = EvaluationContext(doc) for config_id in indicator.indicator_config_ids: config_ids.add(config_id) try: config = _get_config(config_id) except (ResourceNotFound, StaticDataSourceConfigurationNotFoundError): celery_task_logger.info( "{} no longer exists, skipping".format(config_id)) # remove because the config no longer exists _mark_config_to_remove(config_id, [indicator.pk]) continue except ESError: celery_task_logger.info( "ES errored when trying to retrieve config") failed_indicators.add(indicator) continue adapter = None try: adapter = get_indicator_adapter( config, can_handle_laboratory=True) rows_to_save_by_adapter[adapter].extend( adapter.get_all_values(doc, eval_context)) eval_context.reset_iteration() except Exception as e: failed_indicators.add(indicator) handle_exception(e, config_id, doc, adapter) if config and config.icds_rebuild_related_docs: related_doc_ids.add(doc['_id']) for adapter, rows in six.iteritems(rows_to_save_by_adapter): doc_ids = doc_ids_from_rows(rows) indicators = [ indicator_by_doc_id[doc_id] for doc_id in doc_ids ] try: adapter.save_rows(rows) except Exception as e: failed_indicators.union(indicators) notify_exception( None, "Exception bulk saving async indicators:{}".format(e)) else: # remove because it's sucessfully processed _mark_config_to_remove(config_id, [i.pk for i in indicators]) # delete fully processed indicators processed_indicators = set(all_indicators) - failed_indicators AsyncIndicator.objects.filter( pk__in=[i.pk for i in processed_indicators]).delete() # update failure for failed indicators with transaction.atomic(): for indicator in failed_indicators: indicator.update_failure( configs_to_remove_by_indicator_id.get(indicator.pk, [])) indicator.save() # process asyncindicator for any related docs that are not rebuilt so far related_docs_to_rebuild = [] for _id in related_doc_ids: related_docs_to_rebuild.extend(icds_get_related_docs_ids(_id)) related_docs_to_rebuild = set(related_docs_to_rebuild) - set( indicator_doc_ids) _queue_indicators( AsyncIndicator.objects.filter(doc_id__in=related_docs_to_rebuild, date_queued=None)) datadog_counter('commcare.async_indicator.processed_success', len(processed_indicators)) datadog_counter('commcare.async_indicator.processed_fail', len(failed_indicators)) datadog_histogram('commcare.async_indicator.processing_time', timer.duration, tags=['config_ids:{}'.format(config_ids)])