def save_document(doc_ids): lock_keys = [] for doc_id in doc_ids: lock_keys.append(get_async_indicator_modify_lock_key(doc_id)) indicator_config_ids = None timer = TimingContext() with CriticalSection(lock_keys): indicators = AsyncIndicator.objects.filter(doc_id__in=doc_ids) if not indicators: return first_indicator = indicators[0] processed_indicators = [] failed_indicators = [] for i in indicators: assert i.domain == first_indicator.domain assert i.doc_type == first_indicator.doc_type indicator_by_doc_id = {i.doc_id: i for i in indicators} doc_store = get_document_store(first_indicator.domain, first_indicator.doc_type) indicator_config_ids = first_indicator.indicator_config_ids related_docs_to_rebuild = set() with timer: for doc in doc_store.iter_documents(indicator_by_doc_id.keys()): indicator = indicator_by_doc_id[doc['_id']] successfully_processed, to_remove, rebuild_related_docs = _save_document_helper( indicator, doc) if rebuild_related_docs: related_docs_to_rebuild = related_docs_to_rebuild.union( icds_get_related_docs_ids(doc['_id'])) if successfully_processed: processed_indicators.append(indicator.pk) else: failed_indicators.append((indicator, to_remove)) num_processed = len(processed_indicators) num_failed = len(failed_indicators) AsyncIndicator.objects.filter(pk__in=processed_indicators).delete() with transaction.atomic(): for indicator, to_remove in failed_indicators: indicator.update_failure(to_remove) indicator.save() # remove any related docs that were just rebuilt related_docs_to_rebuild = related_docs_to_rebuild - set(doc_ids) # queue the docs that aren't already queued _queue_indicators( AsyncIndicator.objects.filter(doc_id__in=related_docs_to_rebuild, date_queued=None)) datadog_counter('commcare.async_indicator.processed_success', num_processed) datadog_counter('commcare.async_indicator.processed_fail', num_failed) datadog_histogram('commcare.async_indicator.processing_time', timer.duration, tags=[u'config_ids:{}'.format(indicator_config_ids)])
def save_document(doc_ids): lock_keys = [] for doc_id in doc_ids: lock_keys.append(get_async_indicator_modify_lock_key(doc_id)) with CriticalSection(lock_keys): indicators = AsyncIndicator.objects.filter(doc_id__in=doc_ids) if not indicators: return first_indicator = indicators[0] processed_indicators = [] failed_indicators = [] for i in indicators: assert i.domain == first_indicator.domain assert i.doc_type == first_indicator.doc_type indicator_by_doc_id = {i.doc_id: i for i in indicators} doc_store = get_document_store(first_indicator.domain, first_indicator.doc_type) for doc in doc_store.iter_documents(doc_ids): indicator = indicator_by_doc_id[doc['_id']] eval_context = EvaluationContext(doc) for config_id in indicator.indicator_config_ids: adapter = None try: config = _get_config(config_id) adapter = get_indicator_adapter(config, can_handle_laboratory=True) adapter.save(doc, eval_context) eval_context.reset_iteration() except (ESError, RequestError, ConnectionTimeout): # couch or es had an issue so don't log it and go on to the next doc failed_indicators.append(indicator.pk) break except Exception as e: # getting the config could fail before the adapter is set if adapter: adapter.handle_exception(doc, e) failed_indicators.append(indicator.pk) break else: processed_indicators.append(indicator.pk) AsyncIndicator.objects.filter(pk__in=processed_indicators).delete() AsyncIndicator.objects.filter(pk__in=failed_indicators).update( date_queued=None, unsuccessful_attempts=F('unsuccessful_attempts') + 1)
def save_document(doc_ids): lock_keys = [] for doc_id in doc_ids: lock_keys.append(get_async_indicator_modify_lock_key(doc_id)) indicator_config_ids = None timer = TimingContext() with CriticalSection(lock_keys): indicators = AsyncIndicator.objects.filter(doc_id__in=doc_ids) if not indicators: return first_indicator = indicators[0] processed_indicators = [] failed_indicators = [] for i in indicators: assert i.domain == first_indicator.domain assert i.doc_type == first_indicator.doc_type indicator_by_doc_id = {i.doc_id: i for i in indicators} doc_store = get_document_store(first_indicator.domain, first_indicator.doc_type) indicator_config_ids = first_indicator.indicator_config_ids with timer: for doc in doc_store.iter_documents(doc_ids): indicator = indicator_by_doc_id[doc['_id']] successfully_processed, to_remove = _save_document_helper(indicator, doc) if successfully_processed: processed_indicators.append(indicator.pk) else: failed_indicators.append((indicator, to_remove)) AsyncIndicator.objects.filter(pk__in=processed_indicators).delete() with transaction.atomic(): for indicator, to_remove in failed_indicators: indicator.update_failure(to_remove) indicator.save() datadog_histogram( 'commcare.async_indicator.processing_time', timer.duration, tags=[ u'config_ids:{}'.format(indicator_config_ids) ] )
def update_indicators(cls, change, config_ids): doc_id = change.id doc_type = change.document['doc_type'] domain = change.document['domain'] config_ids = sorted(config_ids) indicator, created = cls.objects.get_or_create( doc_id=doc_id, doc_type=doc_type, domain=domain, defaults={'indicator_config_ids': config_ids}) if created: return indicator elif set(config_ids) == indicator.indicator_config_ids: return indicator with CriticalSection([get_async_indicator_modify_lock_key(doc_id)]): # Add new config ids. Need to grab indicator again in case it was # processed since we called get_or_create try: indicator = cls.objects.get(doc_id=doc_id) except cls.DoesNotExist: doc_type = change.document['doc_type'] domain = change.document['domain'] indicator = AsyncIndicator.objects.create( doc_id=doc_id, doc_type=doc_type, domain=domain, indicator_config_ids=config_ids) else: current_config_ids = set(indicator.indicator_config_ids) config_ids = set(config_ids) if config_ids - current_config_ids: new_config_ids = sorted( list(current_config_ids.union(config_ids))) indicator.indicator_config_ids = new_config_ids indicator.unsuccessful_attempts = 0 indicator.save() return indicator
def update_indicators(cls, change, config_ids): doc_id = change.id with CriticalSection([get_async_indicator_modify_lock_key(doc_id)]): try: indicator = cls.objects.get(doc_id=doc_id) except cls.DoesNotExist: doc_type = change.document['doc_type'] domain = change.document['domain'] indicator = AsyncIndicator.objects.create( doc_id=doc_id, doc_type=doc_type, domain=domain, indicator_config_ids=config_ids) else: current_config_ids = set(indicator.indicator_config_ids) config_ids = set(config_ids) if config_ids - current_config_ids: new_config_ids = list(current_config_ids.union(config_ids)) indicator.indicator_config_ids = new_config_ids indicator.save() return indicator
def update_record(cls, doc_id, doc_type, domain, config_ids): if not isinstance(config_ids, list): config_ids = list(config_ids) config_ids = sorted(config_ids) indicator, created = cls.objects.get_or_create( doc_id=doc_id, doc_type=doc_type, domain=domain, defaults={'indicator_config_ids': config_ids} ) if created: return indicator elif set(config_ids) == indicator.indicator_config_ids: return indicator with CriticalSection([get_async_indicator_modify_lock_key(doc_id)]): # Add new config ids. Need to grab indicator again in case it was # processed since we called get_or_create try: indicator = cls.objects.get(doc_id=doc_id) except cls.DoesNotExist: indicator = AsyncIndicator.objects.create( doc_id=doc_id, doc_type=doc_type, domain=domain, indicator_config_ids=config_ids ) else: current_config_ids = set(indicator.indicator_config_ids) config_ids = set(config_ids) if config_ids - current_config_ids: new_config_ids = sorted(list(current_config_ids.union(config_ids))) indicator.indicator_config_ids = new_config_ids indicator.unsuccessful_attempts = 0 indicator.save() return indicator
def build_async_indicators(indicator_doc_ids): # written to be used with _queue_indicators, indicator_doc_ids must # be a chunk of 100 memoizers = {'configs': {}, 'adapters': {}} assert(len(indicator_doc_ids)) <= ASYNC_INDICATOR_CHUNK_SIZE def handle_exception(exception, config_id, doc, adapter): metric = None if isinstance(exception, (ProtocolError, ReadTimeout)): metric = 'commcare.async_indicator.riak_error' elif isinstance(exception, (ESError, ConnectionTimeout)): # a database had an issue so log it and go on to the next document metric = 'commcare.async_indicator.es_error' elif isinstance(exception, (DatabaseError, InternalError)): # a database had an issue so log it and go on to the next document metric = 'commcare.async_indicator.psql_error' else: # getting the config could fail before the adapter is set if adapter: adapter.handle_exception(doc, exception) if metric: metrics_counter(metric, tags={'config_id': config_id}) def doc_ids_from_rows(rows): formatted_rows = [ {column.column.database_column_name.decode('utf-8'): column.value for column in row} for row in rows ] return set(row['doc_id'] for row in formatted_rows) def _get_config(config_id): config_by_id = memoizers['configs'] if config_id in config_by_id: return config_by_id[config_id] else: config = _get_config_by_id(config_id) config_by_id[config_id] = config return config def _get_adapter(config): adapter_by_config = memoizers['adapters'] if config._id in adapter_by_config: return adapter_by_config[config._id] else: adapter = get_indicator_adapter(config, load_source='build_async_indicators') adapter_by_config[config._id] = adapter return adapter def _metrics_timer(step, config_id=None): tags = { 'action': step, } if config_id and settings.ENTERPRISE_MODE: tags['config_id'] = config_id else: # Prometheus requires consistent tags even if not available tags['config_id'] = None return metrics_histogram_timer( 'commcare.async_indicator.timing', timing_buckets=(.03, .1, .3, 1, 3, 10), tags=tags ) # tracks processed/deleted configs to be removed from each indicator configs_to_remove_by_indicator_id = defaultdict(list) def _mark_config_to_remove(config_id, indicator_ids): for _id in indicator_ids: configs_to_remove_by_indicator_id[_id].append(config_id) timer = TimingContext() lock_keys = [ get_async_indicator_modify_lock_key(indicator_doc_id) for indicator_doc_id in indicator_doc_ids ] with CriticalSection(lock_keys): all_indicators = AsyncIndicator.objects.filter( doc_id__in=indicator_doc_ids ) if not all_indicators: return doc_store = get_document_store_for_doc_type( all_indicators[0].domain, all_indicators[0].doc_type, load_source="build_async_indicators", ) failed_indicators = set() rows_to_save_by_adapter = defaultdict(list) docs_to_delete_by_adapter = defaultdict(list) # there will always be one AsyncIndicator per doc id indicator_by_doc_id = {i.doc_id: i for i in all_indicators} config_ids = set() with timer: for doc in doc_store.iter_documents(list(indicator_by_doc_id.keys())): indicator = indicator_by_doc_id[doc['_id']] eval_context = EvaluationContext(doc) for config_id in indicator.indicator_config_ids: with _metrics_timer('transform', config_id): config_ids.add(config_id) try: config = _get_config(config_id) except (ResourceNotFound, StaticDataSourceConfigurationNotFoundError): celery_task_logger.info("{} no longer exists, skipping".format(config_id)) # remove because the config no longer exists _mark_config_to_remove(config_id, [indicator.pk]) continue except ESError: celery_task_logger.info("ES errored when trying to retrieve config") failed_indicators.add(indicator) continue adapter = None try: adapter = _get_adapter(config) rows_to_save = adapter.get_all_values(doc, eval_context) if rows_to_save: rows_to_save_by_adapter[adapter].extend(rows_to_save) else: docs_to_delete_by_adapter[adapter].append(doc) eval_context.reset_iteration() except Exception as e: failed_indicators.add(indicator) handle_exception(e, config_id, doc, adapter) with _metrics_timer('single_batch_update'): for adapter, rows in rows_to_save_by_adapter.items(): doc_ids = doc_ids_from_rows(rows) indicators = [indicator_by_doc_id[doc_id] for doc_id in doc_ids] try: with _metrics_timer('update', adapter.config._id): adapter.save_rows(rows, use_shard_col=True) except Exception as e: failed_indicators.union(indicators) message = str(e) notify_exception(None, "Exception bulk saving async indicators:{}".format(message)) else: # remove because it's successfully processed _mark_config_to_remove( config_id, [i.pk for i in indicators] ) with _metrics_timer('single_batch_delete'): for adapter, docs in docs_to_delete_by_adapter.items(): with _metrics_timer('delete', adapter.config._id): adapter.bulk_delete(docs) # delete fully processed indicators processed_indicators = set(all_indicators) - failed_indicators AsyncIndicator.objects.filter(pk__in=[i.pk for i in processed_indicators]).delete() # update failure for failed indicators with transaction.atomic(): for indicator in failed_indicators: indicator.update_failure( configs_to_remove_by_indicator_id.get(indicator.pk, []) ) indicator.save() metrics_counter('commcare.async_indicator.processed_success', len(processed_indicators)) metrics_counter('commcare.async_indicator.processed_fail', len(failed_indicators)) metrics_counter( 'commcare.async_indicator.processing_time', timer.duration, tags={'config_ids': config_ids} ) metrics_counter( 'commcare.async_indicator.processed_total', len(indicator_doc_ids), tags={'config_ids': config_ids} )
def _build_async_indicators(indicator_doc_ids): def handle_exception(exception, config_id, doc, adapter): metric = None if isinstance(exception, (ProtocolError, ReadTimeout)): metric = 'commcare.async_indicator.riak_error' elif isinstance(exception, (ESError, ConnectionTimeout)): # a database had an issue so log it and go on to the next document metric = 'commcare.async_indicator.es_error' elif isinstance(exception, (DatabaseError, InternalError)): # a database had an issue so log it and go on to the next document metric = 'commcare.async_indicator.psql_error' else: # getting the config could fail before the adapter is set if adapter: adapter.handle_exception(doc, exception) if metric: datadog_counter(metric, 1, tags={ 'config_id': config_id, 'doc_id': doc['_id'] }) def doc_ids_from_rows(rows): formatted_rows = [{ column.column.database_column_name.decode('utf-8'): column.value for column in row } for row in rows] return set(row['doc_id'] for row in formatted_rows) # tracks processed/deleted configs to be removed from each indicator configs_to_remove_by_indicator_id = defaultdict(list) def _mark_config_to_remove(config_id, indicator_ids): for _id in indicator_ids: configs_to_remove_by_indicator_id[_id].append(config_id) timer = TimingContext() lock_keys = [ get_async_indicator_modify_lock_key(indicator_id) for indicator_id in indicator_doc_ids ] with CriticalSection(lock_keys): all_indicators = AsyncIndicator.objects.filter( doc_id__in=indicator_doc_ids) if not all_indicators: return doc_store = get_document_store_for_doc_type(all_indicators[0].domain, all_indicators[0].doc_type) failed_indicators = set() rows_to_save_by_adapter = defaultdict(list) indicator_by_doc_id = {i.doc_id: i for i in all_indicators} config_ids = set() with timer: for doc in doc_store.iter_documents( list(indicator_by_doc_id.keys())): indicator = indicator_by_doc_id[doc['_id']] eval_context = EvaluationContext(doc) for config_id in indicator.indicator_config_ids: config_ids.add(config_id) try: config = _get_config_by_id(config_id) except (ResourceNotFound, StaticDataSourceConfigurationNotFoundError): celery_task_logger.info( "{} no longer exists, skipping".format(config_id)) # remove because the config no longer exists _mark_config_to_remove(config_id, [indicator.pk]) continue except ESError: celery_task_logger.info( "ES errored when trying to retrieve config") failed_indicators.add(indicator) continue adapter = None try: adapter = get_indicator_adapter(config) rows_to_save_by_adapter[adapter].extend( adapter.get_all_values(doc, eval_context)) eval_context.reset_iteration() except Exception as e: failed_indicators.add(indicator) handle_exception(e, config_id, doc, adapter) for adapter, rows in six.iteritems(rows_to_save_by_adapter): doc_ids = doc_ids_from_rows(rows) indicators = [ indicator_by_doc_id[doc_id] for doc_id in doc_ids ] try: adapter.save_rows(rows) except Exception as e: failed_indicators.union(indicators) message = six.text_type(e) notify_exception( None, "Exception bulk saving async indicators:{}".format( message)) else: # remove because it's sucessfully processed _mark_config_to_remove(config_id, [i.pk for i in indicators]) # delete fully processed indicators processed_indicators = set(all_indicators) - failed_indicators AsyncIndicator.objects.filter( pk__in=[i.pk for i in processed_indicators]).delete() # update failure for failed indicators with transaction.atomic(): for indicator in failed_indicators: indicator.update_failure( configs_to_remove_by_indicator_id.get(indicator.pk, [])) indicator.save() datadog_counter('commcare.async_indicator.processed_success', len(processed_indicators)) datadog_counter('commcare.async_indicator.processed_fail', len(failed_indicators)) datadog_histogram('commcare.async_indicator.processing_time', timer.duration / len(indicator_doc_ids), tags=[ 'config_ids:{}'.format(config_ids), ])
def _build_async_indicators(indicator_doc_ids): def handle_exception(exception, config_id, doc, adapter): metric = None if isinstance(exception, (ProtocolError, ReadTimeout)): metric = 'commcare.async_indicator.riak_error' elif isinstance(exception, (ESError, ConnectionTimeout)): # a database had an issue so log it and go on to the next document metric = 'commcare.async_indicator.es_error' elif isinstance(exception, (DatabaseError, InternalError)): # a database had an issue so log it and go on to the next document metric = 'commcare.async_indicator.psql_error' else: # getting the config could fail before the adapter is set if adapter: adapter.handle_exception(doc, exception) if metric: datadog_counter(metric, 1, tags={'config_id': config_id, 'doc_id': doc['_id']}) def doc_ids_from_rows(rows): formatted_rows = [ {column.column.database_column_name.decode('utf-8'): column.value for column in row} for row in rows ] return set(row['doc_id'] for row in formatted_rows) # tracks processed/deleted configs to be removed from each indicator configs_to_remove_by_indicator_id = defaultdict(list) def _mark_config_to_remove(config_id, indicator_ids): for _id in indicator_ids: configs_to_remove_by_indicator_id[_id].append(config_id) timer = TimingContext() lock_keys = [ get_async_indicator_modify_lock_key(indicator_id) for indicator_id in indicator_doc_ids ] with CriticalSection(lock_keys): all_indicators = AsyncIndicator.objects.filter( doc_id__in=indicator_doc_ids ) if not all_indicators: return doc_store = get_document_store_for_doc_type( all_indicators[0].domain, all_indicators[0].doc_type, load_source="build_async_indicators", ) failed_indicators = set() rows_to_save_by_adapter = defaultdict(list) indicator_by_doc_id = {i.doc_id: i for i in all_indicators} config_ids = set() with timer: for doc in doc_store.iter_documents(list(indicator_by_doc_id.keys())): indicator = indicator_by_doc_id[doc['_id']] eval_context = EvaluationContext(doc) for config_id in indicator.indicator_config_ids: config_ids.add(config_id) try: config = _get_config_by_id(config_id) except (ResourceNotFound, StaticDataSourceConfigurationNotFoundError): celery_task_logger.info("{} no longer exists, skipping".format(config_id)) # remove because the config no longer exists _mark_config_to_remove(config_id, [indicator.pk]) continue except ESError: celery_task_logger.info("ES errored when trying to retrieve config") failed_indicators.add(indicator) continue adapter = None try: adapter = get_indicator_adapter(config, load_source='build_async_indicators') rows_to_save_by_adapter[adapter].extend(adapter.get_all_values(doc, eval_context)) eval_context.reset_iteration() except Exception as e: failed_indicators.add(indicator) handle_exception(e, config_id, doc, adapter) for adapter, rows in six.iteritems(rows_to_save_by_adapter): doc_ids = doc_ids_from_rows(rows) indicators = [indicator_by_doc_id[doc_id] for doc_id in doc_ids] try: adapter.save_rows(rows) except Exception as e: failed_indicators.union(indicators) message = six.text_type(e) notify_exception(None, "Exception bulk saving async indicators:{}".format(message)) else: # remove because it's sucessfully processed _mark_config_to_remove( config_id, [i.pk for i in indicators] ) # delete fully processed indicators processed_indicators = set(all_indicators) - failed_indicators AsyncIndicator.objects.filter(pk__in=[i.pk for i in processed_indicators]).delete() # update failure for failed indicators with transaction.atomic(): for indicator in failed_indicators: indicator.update_failure( configs_to_remove_by_indicator_id.get(indicator.pk, []) ) indicator.save() datadog_counter('commcare.async_indicator.processed_success', len(processed_indicators)) datadog_counter('commcare.async_indicator.processed_fail', len(failed_indicators)) datadog_histogram( 'commcare.async_indicator.processing_time', timer.duration / len(indicator_doc_ids), tags=[ 'config_ids:{}'.format(config_ids), ] )