예제 #1
0
    def __init__(self,
                 project=None,
                 restore_user=None,
                 params=None,
                 cache_settings=None,
                 is_async=False,
                 case_sync=None):
        assert isinstance(restore_user, OTARestoreUser)
        self.project = project
        self.domain = project.name if project else ''
        self.restore_user = restore_user
        self.params = params or RestoreParams()
        self.cache_settings = cache_settings or RestoreCacheSettings()
        self.is_async = is_async

        self.restore_state = RestoreState(
            self.project,
            self.restore_user,
            self.params,
            is_async,
            self.cache_settings.overwrite_cache,
            case_sync=case_sync,
        )

        self.force_cache = self.cache_settings.force_cache
        self.cache_timeout = self.cache_settings.cache_timeout
        self.overwrite_cache = self.cache_settings.overwrite_cache

        self.timing_context = TimingContext('restore-{}-{}'.format(
            self.domain, self.restore_user.username))
예제 #2
0
def metrics_histogram_timer(metric: str,
                            timing_buckets: Iterable[int],
                            tags: Dict[str, str] = None,
                            bucket_tag: str = 'duration',
                            callback: Callable = None):
    """
    Create a context manager that times and reports to the metric providers as a histogram

    Example Usage:

    ::

        timer = metrics_histogram_timer('commcare.some.special.metric', tags={
            'type': type,
        ], timing_buckets=(.001, .01, .1, 1, 10, 100))
        with timer:
            some_special_thing()

    This will result it a call to `metrics_histogram` with the timer value.

    Note: Histograms are implemented differently by each provider. See documentation for details.

    :param metric: Name of the metric (must start with 'commcare.')
    :param tags: metric tags to include
    :param timing_buckets: sequence of numbers representing time thresholds, in seconds
    :param bucket_tag: The name of the bucket tag to use (if used by the underlying provider)
    :param callback: a callable which will be called when exiting the context manager with a single argument
                     of the timer duratio
    :return: A context manager that will perform the specified timing
             and send the specified metric

    """
    timer = TimingContext()
    original_stop = timer.stop

    def new_stop(name=None):
        original_stop(name)
        if callback:
            callback(timer.duration)
        metrics_histogram(metric,
                          timer.duration,
                          bucket_tag=bucket_tag,
                          buckets=timing_buckets,
                          bucket_unit='s',
                          tags=tags)
        timer_name = metric
        if metric.startswith('commcare.'):
            timer_name = ".".join(
                metric.split('.')[1:])  # remove the 'commcare.' prefix
        add_breadcrumb(
            category="timing",
            message=f"{timer_name}: {timer.duration:0.3f}",
            level="info",
        )

    timer.stop = new_stop
    return timer
예제 #3
0
 def process_with_error_handling(self, change, processor=None):
     # process given change on all serial processors or given processor.
     # Tracks success/fail in datadog but not the timer metric, caller updates that
     timer = TimingContext()
     is_success = False
     try:
         with timer:
             if processor:
                 processor.process_change(change)
             else:
                 # process on serial processors
                 self.process_change(change, serial_only=True)
         is_success = True
     except Exception as ex:
         try:
             handle_pillow_error(self, change, ex)
         except Exception as e:
             notify_exception(
                 None, 'processor error in pillow {} {}'.format(
                     self.get_name(),
                     e,
                 ))
             raise
     if is_success:
         self._record_change_success_in_datadog(change)
     return timer.duration
예제 #4
0
def slow_function_view(request):
    timer = TimingContext()
    with timer("sleep"):
        time.sleep(0.2)
    response = HttpResponse()
    response.request_timer = timer
    return response
예제 #5
0
파일: users.py 프로젝트: soitun/commcare-hq
 def get_context_data(self, **kwargs):
     context = super(AdminRestoreView, self).get_context_data(**kwargs)
     response, timing_context = self._get_restore_response()
     timing_context = timing_context or TimingContext(self.user.username)
     if isinstance(response, StreamingHttpResponse):
         string_payload = b''.join(response.streaming_content)
         xml_payload = etree.fromstring(string_payload)
         context.update(self.get_stats_from_xml(xml_payload))
     else:
         if response.status_code in (401, 404):
             # corehq.apps.ota.views.get_restore_response couldn't find user or user didn't have perms
             xml_payload = E.error(response.content)
         elif response.status_code == 412:
             # RestoreConfig.get_response returned HttpResponse 412. Response content is already XML
             xml_payload = etree.fromstring(response.content)
         else:
             message = _(
                 'Unexpected restore response {}: {}. '
                 'If you believe this is a bug please report an issue.'
             ).format(response.status_code,
                      response.content.decode('utf-8'))
             xml_payload = E.error(message)
     formatted_payload = etree.tostring(xml_payload,
                                        pretty_print=True,
                                        encoding='utf-8').decode('utf-8')
     hide_xml = self.request.GET.get('hide_xml') == 'true'
     context.update({
         'payload': formatted_payload,
         'status_code': response.status_code,
         'timing_data': timing_context.to_list(),
         'hide_xml': hide_xml,
     })
     return context
예제 #6
0
 def process_changes(self, since, forever):
     """
     Process changes from the changes stream.
     """
     context = PillowRuntimeContext(changes_seen=0)
     try:
         for change in self.get_change_feed().iter_changes(since=since or None, forever=forever):
             if change:
                 timer = TimingContext()
                 try:
                     context.changes_seen += 1
                     with timer:
                         self.process_with_error_handling(change)
                 except Exception as e:
                     notify_exception(None, u'processor error in pillow {} {}'.format(
                         self.get_name(), e,
                     ))
                     self._record_change_exception_in_datadog(change)
                     raise
                 else:
                     updated = self.fire_change_processed_event(change, context)
                     if updated:
                         self._record_checkpoint_in_datadog()
                     self._record_change_success_in_datadog(change)
                 self._record_change_in_datadog(change, timer)
             else:
                 updated = self.checkpoint.touch(min_interval=CHECKPOINT_MIN_WAIT)
                 if updated:
                     self._record_checkpoint_in_datadog()
     except PillowtopCheckpointReset:
         self.process_changes(since=self.get_last_checkpoint_sequence(), forever=forever)
예제 #7
0
    def process_xforms_for_cases(xforms, case_db, timing_context=None):
        from casexml.apps.case.xform import process_cases_with_casedb
        from corehq.apps.commtrack.processing import process_stock

        timing_context = timing_context or TimingContext()

        instance = xforms[0]

        with timing_context("process_cases"):
            case_result = process_cases_with_casedb(xforms, case_db)
        with timing_context("process_ledgers"):
            stock_result = process_stock(xforms, case_db)
            stock_result.populate_models()

        modified_on_date = instance.received_on
        if getattr(instance, 'edited_on',
                   None) and instance.edited_on > instance.received_on:
            modified_on_date = instance.edited_on

        with timing_context("check_cases_before_save"):
            cases = case_db.get_cases_for_saving(modified_on_date)

        return CaseStockProcessingResult(
            case_result=case_result,
            case_models=cases,
            stock_result=stock_result,
        )
예제 #8
0
    def __init__(self, instance=None, attachments=None, auth_context=None,
                 domain=None, app_id=None, build_id=None, path=None,
                 location=None, submit_ip=None, openrosa_headers=None,
                 last_sync_token=None, received_on=None, date_header=None,
                 partial_submission=False, case_db=None, force_logs=False,
                 timing_context=None):
        assert domain, "'domain' is required"
        assert instance, instance
        assert not isinstance(instance, HttpRequest), instance
        self.domain = domain
        self.app_id = app_id
        self.build_id = build_id
        # get_location has good default
        self.location = location or couchforms.get_location()
        self.received_on = received_on
        self.date_header = date_header
        self.submit_ip = submit_ip
        self.last_sync_token = last_sync_token
        self.openrosa_headers = openrosa_headers or {}
        self.instance = instance
        self.attachments = attachments or {}
        self.auth_context = auth_context or DefaultAuthContext()
        self.path = path
        self.interface = FormProcessorInterface(domain)
        self.partial_submission = partial_submission
        # always None except in the case where a system form is being processed as part of another submission
        # e.g. for closing extension cases
        self.case_db = case_db
        if case_db:
            assert case_db.domain == domain
        self.force_logs = force_logs

        self.is_openrosa_version3 = self.openrosa_headers.get(OPENROSA_VERSION_HEADER, '') == OPENROSA_VERSION_3
        self.track_load = form_load_counter("form_submission", domain)
        self.timing_context = timing_context or TimingContext()
예제 #9
0
 def get_context_data(self, **kwargs):
     context = super(AdminRestoreView, self).get_context_data(**kwargs)
     response, timing_context = self._get_restore_response()
     timing_context = timing_context or TimingContext(self.user.username)
     string_payload = ''.join(response.streaming_content)
     xml_payload = etree.fromstring(string_payload)
     restore_id_element = xml_payload.find(
         '{{{0}}}Sync/{{{0}}}restore_id'.format(SYNC_XMLNS))
     num_cases = len(
         xml_payload.findall(
             '{http://commcarehq.org/case/transaction/v2}case'))
     formatted_payload = etree.tostring(xml_payload, pretty_print=True)
     context.update({
         'payload':
         formatted_payload,
         'restore_id':
         restore_id_element.text
         if restore_id_element is not None else None,
         'status_code':
         response.status_code,
         'timing_data':
         timing_context.to_list(),
         'num_cases':
         num_cases,
     })
     return context
예제 #10
0
def migration_restore(request, domain, case_id):
    """Restore endpoint used in bulk case migrations

    Accepts the provided case_id and returns a restore for the user containing:
    * Registration block
    * The passed in case and its full network of cases
    """
    domain_obj = Domain.get_by_name(domain)
    restore_user = request.couch_user
    restore_params = RestoreParams(device_id="case_migration", version=V2)
    restore_state = RestoreState(domain_obj,
                                 restore_user.to_ota_restore_user(domain),
                                 restore_params)
    restore_state.start_sync()
    timing_context = TimingContext('migration-restore-{}-{}'.format(
        domain, restore_user.username))
    case_ids = get_related_case_ids(domain, case_id)
    with RestoreContent(restore_user.username) as content:
        content.append(get_registration_element(restore_user))

        sync_payload = CleanOwnerSyncPayload(timing_context, case_ids,
                                             restore_state)
        sync_payload.extend_response(content)

        payload = content.get_fileobj()

    restore_state.finish_sync()
    return RestoreResponse(payload).get_http_response()
예제 #11
0
def save_document(doc_ids):
    lock_keys = []
    for doc_id in doc_ids:
        lock_keys.append(get_async_indicator_modify_lock_key(doc_id))

    indicator_config_ids = None
    timer = TimingContext()
    with CriticalSection(lock_keys):
        indicators = AsyncIndicator.objects.filter(doc_id__in=doc_ids)
        if not indicators:
            return

        first_indicator = indicators[0]
        processed_indicators = []
        failed_indicators = []

        for i in indicators:
            assert i.domain == first_indicator.domain
            assert i.doc_type == first_indicator.doc_type

        indicator_by_doc_id = {i.doc_id: i for i in indicators}
        doc_store = get_document_store(first_indicator.domain,
                                       first_indicator.doc_type)
        indicator_config_ids = first_indicator.indicator_config_ids
        related_docs_to_rebuild = set()

        with timer:
            for doc in doc_store.iter_documents(indicator_by_doc_id.keys()):
                indicator = indicator_by_doc_id[doc['_id']]
                successfully_processed, to_remove, rebuild_related_docs = _save_document_helper(
                    indicator, doc)
                if rebuild_related_docs:
                    related_docs_to_rebuild = related_docs_to_rebuild.union(
                        icds_get_related_docs_ids(doc['_id']))
                if successfully_processed:
                    processed_indicators.append(indicator.pk)
                else:
                    failed_indicators.append((indicator, to_remove))

        num_processed = len(processed_indicators)
        num_failed = len(failed_indicators)
        AsyncIndicator.objects.filter(pk__in=processed_indicators).delete()
        with transaction.atomic():
            for indicator, to_remove in failed_indicators:
                indicator.update_failure(to_remove)
                indicator.save()

    # remove any related docs that were just rebuilt
    related_docs_to_rebuild = related_docs_to_rebuild - set(doc_ids)
    # queue the docs that aren't already queued
    _queue_indicators(
        AsyncIndicator.objects.filter(doc_id__in=related_docs_to_rebuild,
                                      date_queued=None))

    datadog_counter('commcare.async_indicator.processed_success',
                    num_processed)
    datadog_counter('commcare.async_indicator.processed_fail', num_failed)
    datadog_histogram('commcare.async_indicator.processing_time',
                      timer.duration,
                      tags=[u'config_ids:{}'.format(indicator_config_ids)])
예제 #12
0
 def get_context_data(self, **kwargs):
     context = super(AdminRestoreView, self).get_context_data(**kwargs)
     response, timing_context = self._get_restore_response()
     timing_context = timing_context or TimingContext(self.user.username)
     if isinstance(response, StreamingHttpResponse):
         string_payload = ''.join(response.streaming_content)
         xml_payload = etree.fromstring(string_payload)
         restore_id_element = xml_payload.find('{{{0}}}Sync/{{{0}}}restore_id'.format(SYNC_XMLNS))
         num_cases = len(xml_payload.findall('{http://commcarehq.org/case/transaction/v2}case'))
     else:
         if response.status_code in (401, 404):
             # corehq.apps.ota.views.get_restore_response couldn't find user or user didn't have perms
             xml_payload = E.error(response.content)
         elif response.status_code == 412:
             # RestoreConfig.get_response returned HttpResponse 412. Response content is already XML
             xml_payload = etree.fromstring(response.content)
         else:
             message = _('Unexpected restore response {}: {}. '
                         'If you believe this is a bug please report an issue.').format(response.status_code,
                                                                                        response.content)
             xml_payload = E.error(message)
         restore_id_element = None
         num_cases = 0
     formatted_payload = etree.tostring(xml_payload, pretty_print=True)
     context.update({
         'payload': formatted_payload,
         'restore_id': restore_id_element.text if restore_id_element is not None else None,
         'status_code': response.status_code,
         'timing_data': timing_context.to_list(),
         'num_cases': num_cases,
     })
     return context
예제 #13
0
 def submit_case_blocks(self, caseblocks):
     timer = None
     try:
         with TimingContext() as timer:
             return super().submit_case_blocks(caseblocks)
     finally:
         if timer:
             self._last_submission_duration = timer.duration
예제 #14
0
 def do_import(self, spreadsheet):
     with TimingContext() as timer:
         results = super().do_import(spreadsheet)
     try:
         self._report_import_timings(timer, results)
     except Exception:
         notify_exception(None, "Error reporting case import timings")
     finally:
         return results
예제 #15
0
def run_checks(checks_to_do):
    greenlets = []
    with TimingContext() as timer:
        for check_name in checks_to_do:
            if check_name not in CHECKS:
                raise UnknownCheckException(check_name)

            greenlets.append(gevent.spawn(_run_check, check_name, timer))
        gevent.joinall(greenlets)
    return [greenlet.value for greenlet in greenlets]
예제 #16
0
 def dispatch(self, request_type, request, **kwargs):
     if not toggles.ODATA.enabled_for_request(request):
         raise ImmediateHttpResponse(
             response=HttpResponseNotFound('Feature flag not enabled.'))
     self.config_id = kwargs['config_id']
     with TimingContext() as timer:
         response = super(ODataFormResource,
                          self).dispatch(request_type, request, **kwargs)
     record_feed_access_in_datadog(request, self.config_id, timer.duration,
                                   response)
     return response
예제 #17
0
 def dispatch(self, request_type, request, **kwargs):
     if not BI_INTEGRATION_PREVIEW.enabled_for_request(request):
         raise ImmediateHttpResponse(
             response=HttpResponseNotFound('Feature flag not enabled.'))
     self.config_id = kwargs['config_id']
     self.table_id = int(kwargs.get('table_id', 0))
     with TimingContext() as timer:
         response = super(ODataCaseResource,
                          self).dispatch(request_type, request, **kwargs)
     record_feed_access_in_datadog(request, self.config_id, timer.duration,
                                   response)
     return response
예제 #18
0
파일: v0_5.py 프로젝트: caktus/commcare-hq
 def dispatch(self, request_type, request, **kwargs):
     if not domain_has_privilege(request.domain, privileges.ODATA_FEED):
         raise ImmediateHttpResponse(
             response=HttpResponseNotFound('Feature flag not enabled.'))
     self.config_id = kwargs['config_id']
     self.table_id = int(kwargs.get('table_id', 0))
     with TimingContext() as timer:
         response = super(BaseODataResource,
                          self).dispatch(request_type, request, **kwargs)
     record_feed_access_in_datadog(request, self.config_id, timer.duration,
                                   response)
     return response
예제 #19
0
def datadog_bucket_timer(metric, tags, timing_buckets, callback=None):
    """
    create a context manager that times and reports to datadog using timing buckets

    adds a 'duration' tag specifying which predefined timing bucket the timing fell into,
    see the `bucket_value` function for more info.

    Example Usage:

        timer = datadog_bucket_timer('commcare.some.special.metric', tags=[
            'type:{}'.format(type),
        ], timing_buckets=(.001, .01, .1, 1, 10, 100))
        with timer:
            some_special_thing()

    This will result it a datadog counter metric with a 'duration' tag, with the possible values
    lt_0.001, lt_0.01, lt_0.1, lt_001, lt_010, lt_100, and over_100.

    :param metric: Name of the datadog metric (must start with 'commcare.')
    :param tags: Datadog tags to include
    :param timing_buckets: sequence of numbers representing time thresholds, in seconds
    :return: A context manager that will perform the specified timing
             and send the specified metric

    """
    timer = TimingContext()
    original_stop = timer.stop

    def new_stop(name=None):
        original_stop(name)
        if callback:
            callback(timer.duration)
        datadog_counter(metric,
                        tags=list(tags) + [
                            'duration:%s' %
                            bucket_value(timer.duration, timing_buckets, 's')
                        ])

    timer.stop = new_stop
    return timer
예제 #20
0
    def _batch_process_with_error_handling(self, changes_chunk):
        """
        Process given chunk in batch mode first on batch-processors
            and only latter on serial processors one by one, so that
            docs can be fetched only once while batch processing in bulk
            and cached via change.get_document for serial processors.
            For the relookup to be avoided at least one batch processor under
            should use change.set_document after docs are fetched in bulk

            If there is an exception in chunked processing, falls back
            to serial processing.
        """
        processing_time = 0

        def reprocess_serially(chunk, processor):
            for change in chunk:
                self.process_with_error_handling(change, processor)

        for processor in self.batch_processors:
            if not changes_chunk:
                return set(), 0

            changes_chunk = self._deduplicate_changes(changes_chunk)
            retry_changes = set()
            timer = TimingContext()
            with timer:
                try:
                    retry_changes, change_exceptions = processor.process_changes_chunk(
                        changes_chunk)
                except Exception as ex:
                    notify_exception(
                        None,
                        "{pillow_name} Error in processing changes chunk {change_ids}: {ex}"
                        .format(pillow_name=self.get_name(),
                                change_ids=[c.id for c in changes_chunk],
                                ex=ex))
                    self._record_batch_exception_in_datadog(processor)
                    for change in set(changes_chunk) - set(retry_changes):
                        self._record_change_success_in_datadog(
                            change, processor)
                    # fall back to processing one by one
                    reprocess_serially(changes_chunk, processor)
                else:
                    # fall back to processing one by one for failed changes
                    for change, exception in change_exceptions:
                        handle_pillow_error(self, change, exception)
                    reprocess_serially(retry_changes, processor)
                processing_time += timer.duration
        # process on serial_processors
        for change in changes_chunk:
            processing_time += self.process_with_error_handling(change)
        self._record_datadog_metrics(changes_chunk, processing_time)
예제 #21
0
def rate_limit_submission_by_delaying(domain, max_wait):
    if not submission_rate_limiter.allow_usage(domain):
        with TimingContext() as timer:
            acquired = submission_rate_limiter.wait(domain, timeout=max_wait)
        if acquired:
            duration_tag = bucket_value(timer.duration, [1, 5, 10, 15, 20], unit='s')
        else:
            duration_tag = 'timeout'
        datadog_counter('commcare.xform_submissions.rate_limited.test', tags=[
            'domain:{}'.format(domain),
            'duration:{}'.format(duration_tag)
        ])
    submission_rate_limiter.report_usage(domain)
예제 #22
0
 def custom_data(self, selected_location, domain):
     timer = TimingContext()
     with timer:
         to_ret = self._custom_data(selected_location, domain)
     if selected_location:
         loc_type = selected_location.location_type.name
     else:
         loc_type = None
     datadog_histogram("commcare.icds.block_reports.custom_data_time",
                       timer.duration,
                       tags="location_type:{}, report_slug:{}".format(
                           loc_type, self.slug))
     return to_ret
예제 #23
0
def datadog_bucket_timer(metric, tags, timing_buckets, callback=None):
    """
    create a context manager that times and reports to datadog using timing buckets

    adds a 'duration' tag specifying which predefined timing bucket the timing fell into,
    see the `bucket_value` function for more info.

    Example Usage:

        timer = datadog_bucket_timer('commcare.some.special.metric', tags=[
            'type:{}'.format(type),
        ], timing_buckets=(.001, .01, .1, 1, 10, 100))
        with timer:
            some_special_thing()

    This will result it a datadog counter metric with a 'duration' tag, with the possible values
    lt_0.001, lt_0.01, lt_0.1, lt_001, lt_010, lt_100, and over_100.

    :param metric: Name of the datadog metric (must start with 'commcare.')
    :param tags: Datadog tags to include
    :param timing_buckets: sequence of numbers representing time thresholds, in seconds
    :return: A context manager that will perform the specified timing
             and send the specified metric

    """
    timer = TimingContext()
    original_stop = timer.stop

    def new_stop(name=None):
        original_stop(name)
        if callback:
            callback(timer.duration)
        datadog_counter(
            metric,
            tags=list(tags) + ['duration:%s' % bucket_value(timer.duration, timing_buckets, 's')]
        )

    timer.stop = new_stop
    return timer
예제 #24
0
def _delay_and_report_rate_limit_submission(domain, max_wait, datadog_metric):
    with TimingContext() as timer:
        acquired = submission_rate_limiter.wait(domain, timeout=max_wait)
    if acquired:
        duration_tag = bucket_value(timer.duration, [.5, 1, 5, 10, 15], unit='s')
    elif timer.duration < max_wait:
        duration_tag = 'quick_reject'
    else:
        duration_tag = 'delayed_reject'
    datadog_counter(datadog_metric, tags=[
        f'domain:{domain}',
        f'duration:{duration_tag}',
        f'throttle_method:{"delay" if acquired else "reject"}'
    ])
    return acquired
    def migrate(self):
        self.log_info('migrating domain {}'.format(self.domain))
        with TimingContext("couch_sql_migration") as timing_context:
            self.timing_context = timing_context
            with timing_context('main_forms'):
                self._process_main_forms()
            with timing_context("unprocessed_forms"):
                self._copy_unprocessed_forms()
            with timing_context("unprocessed_cases"):
                self._copy_unprocessed_cases()
            with timing_context("case_diffs"):
                self._calculate_case_diffs()

        self._send_timings(timing_context)
        self.log_info('migrated domain {}'.format(self.domain))
예제 #26
0
    def get_context_data(self, **kwargs):
        context = super(AdminRestoreView, self).get_context_data(**kwargs)
        response, timing_context = self._get_restore_response()
        timing_context = timing_context or TimingContext(self.user.username)
        if isinstance(response, StreamingHttpResponse):
            string_payload = b''.join(response.streaming_content)
            xml_payload = etree.fromstring(string_payload)
            restore_id_element = xml_payload.find('{{{0}}}Sync/{{{0}}}restore_id'.format(SYNC_XMLNS))
            cases = xml_payload.findall('{http://commcarehq.org/case/transaction/v2}case')
            num_cases = len(cases)

            create_case_type = filter(None, [case.find(
                '{http://commcarehq.org/case/transaction/v2}create/'
                '{http://commcarehq.org/case/transaction/v2}case_type'
            ) for case in cases])
            update_case_type = filter(None, [case.find(
예제 #27
0
def run_checks(checks_to_do):
    statuses = []
    with TimingContext() as timer:
        for check_name in checks_to_do:
            if check_name not in CHECKS:
                raise UnknownCheckException(check_name)

            check_info = CHECKS[check_name]
            with timer(check_name):
                try:
                    status = check_info['check_func']()
                except Exception as e:
                    status = ServiceStatus(False, "{} raised an error".format(check_name), e)
                status.duration = timer.peek().duration
            statuses.append((check_name, status))
    return statuses
예제 #28
0
 def custom_data(self, selected_location, domain):
     timer = TimingContext()
     with timer:
         to_ret = self._custom_data(selected_location, domain)
     if selected_location:
         loc_type = selected_location.location_type.name
     else:
         loc_type = None
     tags = ["location_type:{}".format(loc_type), "report_slug:{}".format(self.slug)]
     if self.allow_conditional_agg:
         tags.append("allow_conditional_agg:yes")
     datadog_histogram(
         "commcare.icds.block_reports.custom_data_duration",
         timer.duration,
         tags=tags
     )
     return to_ret
예제 #29
0
 def process_with_error_handling(self, change, context):
     timer = TimingContext()
     try:
         with timer:
             self.process_change(change)
     except Exception as ex:
         try:
             handle_pillow_error(self, change, ex)
         except Exception as e:
             notify_exception(None, 'processor error in pillow {} {}'.format(
                 self.get_name(), e,
             ))
             raise
     else:
         self._update_checkpoint(change, context)
         self._record_change_success_in_datadog(change)
     self._record_change_in_datadog(change, timer)
예제 #30
0
def save_document(doc_ids):
    lock_keys = []
    for doc_id in doc_ids:
        lock_keys.append(get_async_indicator_modify_lock_key(doc_id))

    indicator_config_ids = None
    timer = TimingContext()
    with CriticalSection(lock_keys):
        indicators = AsyncIndicator.objects.filter(doc_id__in=doc_ids)
        if not indicators:
            return

        first_indicator = indicators[0]
        processed_indicators = []
        failed_indicators = []

        for i in indicators:
            assert i.domain == first_indicator.domain
            assert i.doc_type == first_indicator.doc_type

        indicator_by_doc_id = {i.doc_id: i for i in indicators}
        doc_store = get_document_store(first_indicator.domain, first_indicator.doc_type)
        indicator_config_ids = first_indicator.indicator_config_ids

        with timer:
            for doc in doc_store.iter_documents(doc_ids):
                indicator = indicator_by_doc_id[doc['_id']]
                successfully_processed, to_remove = _save_document_helper(indicator, doc)
                if successfully_processed:
                    processed_indicators.append(indicator.pk)
                else:
                    failed_indicators.append((indicator, to_remove))

        AsyncIndicator.objects.filter(pk__in=processed_indicators).delete()
        with transaction.atomic():
            for indicator, to_remove in failed_indicators:
                indicator.update_failure(to_remove)
                indicator.save()

    datadog_histogram(
        'commcare.async_indicator.processing_time', timer.duration,
        tags=[
            u'config_ids:{}'.format(indicator_config_ids)
        ]
    )
예제 #31
0
    def migrate(self):
        log.info('migrating domain {}'.format(self.domain))
        log.info('run timestamp is {}'.format(self.run_timestamp))

        self.processed_docs = 0
        with TimingContext("couch_sql_migration") as timing_context:
            self.timing_context = timing_context
            with timing_context('main_forms'):
                self._process_main_forms()
            with timing_context("unprocessed_forms"):
                self._copy_unprocessed_forms()
            with timing_context("unprocessed_cases"):
                self._copy_unprocessed_cases()
            with timing_context("case_diffs"):
                self._calculate_case_diffs()

        self._send_timings(timing_context)
        log.info('migrated domain {}'.format(self.domain))
예제 #32
0
    def __init__(self, project=None, restore_user=None, params=None,
                 cache_settings=None, is_async=False, case_sync=None):
        assert isinstance(restore_user, OTARestoreUser)
        self.project = project
        self.domain = project.name if project else ''
        self.restore_user = restore_user
        self.params = params or RestoreParams()
        self.cache_settings = cache_settings or RestoreCacheSettings()
        self.is_async = is_async

        self.restore_state = RestoreState(
            self.project,
            self.restore_user,
            self.params, is_async,
            self.cache_settings.overwrite_cache,
            case_sync=case_sync,
        )

        self.force_cache = self.cache_settings.force_cache
        self.cache_timeout = self.cache_settings.cache_timeout
        self.overwrite_cache = self.cache_settings.overwrite_cache

        self.timing_context = TimingContext('restore-{}-{}'.format(self.domain, self.restore_user.username))
예제 #33
0
class RestoreConfig(object):
    """
    A collection of attributes associated with an OTA restore

    :param project:         The domain object. An instance of `Domain`.
    :param restore_user:    The restore user requesting the restore
    :param params:          The RestoreParams associated with this (see above).
    :param cache_settings:  The RestoreCacheSettings associated with this (see above).
    :param is_async:           Whether to get the restore response using a celery task
    :param case_sync:       Case sync algorithm (None -> default).
    """

    def __init__(self, project=None, restore_user=None, params=None,
                 cache_settings=None, is_async=False, case_sync=None):
        assert isinstance(restore_user, OTARestoreUser)
        self.project = project
        self.domain = project.name if project else ''
        self.restore_user = restore_user
        self.params = params or RestoreParams()
        self.cache_settings = cache_settings or RestoreCacheSettings()
        self.is_async = is_async

        self.restore_state = RestoreState(
            self.project,
            self.restore_user,
            self.params, is_async,
            self.cache_settings.overwrite_cache,
            case_sync=case_sync,
        )

        self.force_cache = self.cache_settings.force_cache
        self.cache_timeout = self.cache_settings.cache_timeout
        self.overwrite_cache = self.cache_settings.overwrite_cache

        self.timing_context = TimingContext('restore-{}-{}'.format(self.domain, self.restore_user.username))

    @property
    @memoized
    def sync_log(self):
        return self.restore_state.last_sync_log

    @property
    def async_restore_task_id_cache(self):
        return AsyncRestoreTaskIdCache(
            domain=self.domain,
            user_id=self.restore_user.user_id,
            sync_log_id=self.sync_log._id if self.sync_log else '',
            device_id=self.params.device_id,
        )

    @property
    def restore_payload_path_cache(self):
        return RestorePayloadPathCache(
            domain=self.domain,
            user_id=self.restore_user.user_id,
            sync_log_id=self.sync_log._id if self.sync_log else '',
            device_id=self.params.device_id,
        )

    @property
    def initial_restore_payload_path_cache(self):
        return RestorePayloadPathCache(
            domain=self.domain,
            user_id=self.restore_user.user_id,
            sync_log_id='',
            device_id=self.params.device_id,
        )

    def get_response(self):
        is_async = self.is_async
        try:
            with self.timing_context:
                payload = self.get_payload()
            response = payload.get_http_response()
        except RestoreException as e:
            logging.exception("%s error during restore submitted by %s: %s" %
                              (type(e).__name__, self.restore_user.username, str(e)))
            is_async = False
            response = get_simple_response_xml(
                e.message,
                ResponseNature.OTA_RESTORE_ERROR
            )
            response = HttpResponse(response, content_type="text/xml; charset=utf-8",
                                    status=412)  # precondition failed
        if not is_async:
            self._record_timing(response.status_code)
        return response

    def get_payload(self):
        self.validate()
        self.delete_initial_cached_payload_if_necessary()
        self.delete_cached_payload_if_necessary()

        cached_response = self.get_cached_response()
        tags = [
            'domain:{}'.format(self.domain),
            'is_async:{}'.format(bool(self.is_async)),
        ]
        if cached_response:
            datadog_counter('commcare.restores.cache_hits.count', tags=tags)
            return cached_response
        datadog_counter('commcare.restores.cache_misses.count', tags=tags)

        # Start new sync
        if self.is_async:
            response = self._get_asynchronous_payload()
        else:
            response = self.generate_payload()

        return response

    def validate(self):
        try:
            self.restore_state.validate_state()
        except InvalidSyncLogException as e:
            # This exception will get caught by the view and a 412 will be returned to the phone for resync
            raise RestoreException(e)

    def get_cached_response(self):
        if self.overwrite_cache:
            return None

        cache_payload_path = self.restore_payload_path_cache.get_value()

        return CachedResponse(cache_payload_path)

    def generate_payload(self, async_task=None):
        if async_task:
            self.timing_context.stop("wait_for_task_to_start")
        self.restore_state.start_sync()
        fileobj = self._generate_restore_response(async_task=async_task)
        try:
            self.restore_state.finish_sync()
            cached_response = self.set_cached_payload_if_necessary(
                fileobj, self.restore_state.duration, async_task)
            if async_task:
                fileobj.close()
                assert cached_response is not None
                response = cached_response
                self.timing_context.stop()  # root timer
                self._record_timing('async')
            else:
                fileobj.seek(0)
                response = RestoreResponse(fileobj)
        except:
            fileobj.close()
            raise
        return response

    def _get_asynchronous_payload(self):
        new_task = False
        # fetch the task from celery
        task_id = self.async_restore_task_id_cache.get_value()
        task = AsyncResult(task_id)
        task_exists = task.status == ASYNC_RESTORE_SENT

        if not task_exists:
            # start a new task
            # NOTE this starts a nested timer (wait_for_task_to_start),
            # which will be stopped by self.generate_payload(async_task)
            # in the asynchronous task. It is expected that
            # get_async_restore_payload.delay(self) will serialize this
            # RestoreConfig and it's associated TimingContext before it
            # returns, and thereby fork the timing context. The timing
            # context associated with this side of the fork will not be
            # recorded since it is async (see self.get_response).
            with self.timing_context("wait_for_task_to_start"):
                task = get_async_restore_payload.delay(self, self.domain, self.restore_user.username)
            new_task = True
            # store the task id in cache
            self.async_restore_task_id_cache.set_value(task.id)
        try:
            response = task.get(timeout=self._get_task_timeout(new_task))
        except TimeoutError:
            # return a 202 with progress
            response = AsyncRestoreResponse(task, self.restore_user.username)

        return response

    def _get_task_timeout(self, new_task):
        # if this is a new task, wait for INITIAL_ASYNC_TIMEOUT in case
        # this restore completes quickly. otherwise, only wait 1 second for
        # a response.
        return INITIAL_ASYNC_TIMEOUT_THRESHOLD if new_task else 1

    def _generate_restore_response(self, async_task=None):
        """
        :returns: A file-like object containing response content.
        """
        username = self.restore_user.username
        count_items = self.params.include_item_count
        with RestoreContent(username, count_items) as content:
            for provider in get_element_providers(self.timing_context):
                with self.timing_context(provider.__class__.__name__):
                    content.extend(provider.get_elements(self.restore_state))

            for provider in get_async_providers(self.timing_context, async_task):
                with self.timing_context(provider.__class__.__name__):
                    provider.extend_response(self.restore_state, content)

            return content.get_fileobj()

    def set_cached_payload_if_necessary(self, fileobj, duration, is_async):
        # only cache if the duration was longer than the threshold
        is_long_restore = duration > timedelta(seconds=INITIAL_SYNC_CACHE_THRESHOLD)
        if is_async or self.force_cache or is_long_restore:
            type_ = 'unknown'
            if is_async:
                type_ = 'async'
            elif self.force_cache:
                type_ = 'force'
            elif is_long_restore:
                type_ = 'long'

            tags = {
                'type:{}'.format(type_),
            }
            datadog_counter('commcare.restores.cache_writes', tags=tags)
            response = CachedResponse.save_for_later(
                fileobj,
                self.cache_timeout,
                self.domain,
                self.restore_user.user_id,
            )
            self.restore_payload_path_cache.set_value(response.name, self.cache_timeout)
            return response
        return None

    def delete_cached_payload_if_necessary(self):
        if self.overwrite_cache and self.restore_payload_path_cache.get_value():
            self.restore_payload_path_cache.invalidate()

    def delete_initial_cached_payload_if_necessary(self):
        if self.sync_log:
            # Restores are usually cached by there sync token
            # but initial restores don't have a sync token,
            # so they're indistinguishable from each other.
            # Once a user syncs with a sync token, we're sure their initial sync is stale,
            # so delete it to avoid a stale payload if they (say) wipe the phone and sync again
            self.initial_restore_payload_path_cache.invalidate()

    def _record_timing(self, status):
        timing = self.timing_context
        assert timing.is_finished()
        duration = timing.duration if timing is not None else -1
        device_id = self.params.device_id
        if duration > 20 or status == 412:
            if status == 412:
                # use last sync log since there is no current sync log
                sync_log_id = self.params.sync_log_id or 'N/A'
            else:
                sync_log = self.restore_state.current_sync_log
                sync_log_id = sync_log._id if sync_log else 'N/A'
            log = logging.getLogger(__name__)
            log.info(
                "restore %s: user=%s device=%s domain=%s status=%s duration=%.3f",
                sync_log_id,
                self.restore_user.username,
                device_id,
                self.domain,
                status,
                duration,
            )
        is_webapps = device_id and device_id.startswith("WebAppsLogin")
        tags = [
            'status_code:{}'.format(status),
            'device_type:{}'.format('webapps' if is_webapps else 'other'),
        ]
        env = settings.SERVER_ENVIRONMENT
        if (env, self.domain) in settings.RESTORE_TIMING_DOMAINS:
            tags.append('domain:{}'.format(self.domain))
        if timing is not None:
            timer_buckets = (5, 20, 60, 120)
            for timer in timing.to_list(exclude_root=True):
                if timer.name in RESTORE_SEGMENTS:
                    segment = RESTORE_SEGMENTS[timer.name]
                    bucket = bucket_value(timer.duration, timer_buckets, 's')
                    datadog_counter(
                        'commcare.restores.{}'.format(segment),
                        tags=tags + ['duration:%s' % bucket],
                    )
                elif timer.name.startswith('fixture:'):
                    bucket = bucket_value(timer.duration, timer_buckets, 's')
                    datadog_counter(
                        'commcare.restores.fixture',
                        tags=tags + [
                            'duration:%s' % bucket,
                            timer.name,
                        ],
                    )
            tags.append('duration:%s' % bucket_value(timing.duration, timer_buckets, 's'))

        if settings.ENTERPRISE_MODE and self.params.app and self.params.app.copy_of:
            app_name = slugify(self.params.app.name)
            tags.append('app:{}-{}'.format(app_name, self.params.app.version))

        datadog_counter('commcare.restores.count', tags=tags)