def check_repeaters(): start = datetime.utcnow() six_hours_sec = 6 * 60 * 60 six_hours_later = start + timedelta(seconds=six_hours_sec) # Long timeout to allow all waiting repeat records to be iterated check_repeater_lock = get_redis_lock( CHECK_REPEATERS_KEY, timeout=six_hours_sec, name=CHECK_REPEATERS_KEY, ) if not check_repeater_lock.acquire(blocking=False): metrics_counter("commcare.repeaters.check.locked_out") return try: with metrics_histogram_timer( "commcare.repeaters.check.processing", timing_buckets=_check_repeaters_buckets, ): for record in iterate_repeat_records(start): if datetime.utcnow() > six_hours_later: _soft_assert( False, "I've been iterating repeat records for six hours. I quit!" ) break metrics_counter("commcare.repeaters.check.attempt_forward") record.attempt_forward_now() finally: check_repeater_lock.release()
def check_repeaters(): start = datetime.utcnow() twentythree_hours_sec = 23 * 60 * 60 twentythree_hours_later = start + timedelta(hours=23) # Long timeout to allow all waiting repeat records to be iterated check_repeater_lock = get_redis_lock( CHECK_REPEATERS_KEY, timeout=twentythree_hours_sec, name=CHECK_REPEATERS_KEY, ) if not check_repeater_lock.acquire(blocking=False): metrics_counter("commcare.repeaters.check.locked_out") return try: with metrics_histogram_timer( "commcare.repeaters.check.processing", timing_buckets=_check_repeaters_buckets, ): for record in iterate_repeat_records(start): if not _soft_assert( datetime.utcnow() < twentythree_hours_later, "I've been iterating repeat records for 23 hours. I quit!" ): break metrics_counter("commcare.repeaters.check.attempt_forward") record.attempt_forward_now() else: iterating_time = datetime.utcnow() - start _soft_assert( iterating_time < timedelta(hours=6), f"It took {iterating_time} to iterate repeat records.") finally: check_repeater_lock.release()
def es_results(self): timer = metrics_histogram_timer( 'commcare.case_list_explorer_query.es_timings', timing_buckets=(0.01, 0.05, 1, 5), ) with timer: return super(CaseListExplorer, self).es_results
def _datadog_timing(self, step): return metrics_histogram_timer('commcare.change_feed.processor.timing', timing_buckets=(.03, .1, .3, 1, 3, 10), tags={ 'action': step, 'index': self.index_info.alias, })
def save_copy(request, domain, app_id): """ Saves a copy of the app to a new doc. See ApplicationBase.save_copy """ track_built_app_on_hubspot.delay(request.couch_user) comment = request.POST.get('comment') app = get_app(domain, app_id) try: errors = app.validate_app() except ModuleIdMissingException: # For apps (mainly Exchange apps) that lost unique_id attributes on Module app.ensure_module_unique_ids(should_save=True) errors = app.validate_app() if not errors: try: user_id = request.couch_user.get_id buckets = (1, 10, 30, 60, 120, 240) with metrics_histogram_timer('commcare.app_build.new_release', timing_buckets=buckets): copy = make_app_build(app, comment, user_id) CouchUser.get(user_id).set_has_built_app() except BuildConflictException: return JsonResponse( { 'error': _("There is already a version build in progress. Please wait." ) }, status=400) finally: # To make a RemoteApp always available for building if app.is_remote_app(): app.save(increment_version=True) _track_build_for_app_preview(domain, request.couch_user, app_id, 'User created a build') else: copy = None copy = copy and SavedAppBuild.wrap(copy.to_json()).releases_list_json( get_timezone_for_user(request.couch_user, domain)) lang, langs = get_langs(request, app) return json_response({ "saved_app": copy, "error_html": render_to_string( "app_manager/partials/build_errors.html", { 'app': get_app(domain, app_id), 'build_errors': errors, 'domain': domain, 'langs': langs, }), })
def _per_config_metrics_timer(self, step, config_id): tags = { 'action': step, } if settings.ENTERPRISE_MODE: tags['config_id'] = config_id return metrics_histogram_timer('commcare.change_feed.urc.timing', timing_buckets=(.03, .1, .3, 1, 3, 10), tags=tags)
def _metrics_timer(step, config_id=None): tags = { 'action': step, } if config_id and settings.ENTERPRISE_MODE: tags['config_id'] = config_id return metrics_histogram_timer('commcare.async_indicator.timing', timing_buckets=(.03, .1, .3, 1, 3, 10), tags=tags)
def _metrics_timer(self, step, config_id=None): tags = { 'action': step, 'index': 'ucr', } if config_id and settings.ENTERPRISE_MODE: tags['config_id'] = config_id return metrics_histogram_timer('commcare.change_feed.processor.timing', timing_buckets=(.03, .1, .3, 1, 3, 10), tags=tags)
def __init__(self, lock, name, track_unreleased=True): self.lock = lock self.tags = {"lock_name": name} self.name = name self.key = lock.name self.lock_timer = metrics_histogram_timer("commcare.lock.locked_time", self.timing_buckets) self.track_unreleased = track_unreleased self.end_time = None self.lock_trace = None
def _metrics_timer(step, config_id=None): tags = { 'action': step, } if config_id and settings.ENTERPRISE_MODE: tags['config_id'] = config_id else: # Prometheus requires consistent tags even if not available tags['config_id'] = None return metrics_histogram_timer('commcare.async_indicator.timing', timing_buckets=(.03, .1, .3, 1, 3, 10), tags=tags)
def _get_rows(self, data): timer = metrics_histogram_timer( 'commcare.case_list_explorer_query.row_fetch_timings', timing_buckets=(0.01, 0.05, 1, 5), ) with timer: for case in data: case_display = SafeCaseDisplay(self, case) yield [ case_display.get(column.prop_name) for column in self.columns ]
def check_repeaters_in_partition(partition): """ The CHECK_REPEATERS_PARTITION_COUNT constant dictates the total number of partitions :param partition: index of partition to check """ start = datetime.utcnow() twentythree_hours_sec = 23 * 60 * 60 twentythree_hours_later = start + timedelta(hours=23) # Long timeout to allow all waiting repeat records to be iterated lock_key = f"{CHECK_REPEATERS_KEY}_{partition}_in_{CHECK_REPEATERS_PARTITION_COUNT}" check_repeater_lock = get_redis_lock( lock_key, timeout=twentythree_hours_sec, name=lock_key, ) if not check_repeater_lock.acquire(blocking=False): metrics_counter("commcare.repeaters.check.locked_out", tags={'partition': partition}) return try: with metrics_histogram_timer( "commcare.repeaters.check.processing", timing_buckets=_check_repeaters_buckets, ): for record in _iterate_repeat_records_for_partition( start, partition, CHECK_REPEATERS_PARTITION_COUNT): if not _soft_assert( datetime.utcnow() < twentythree_hours_later, "I've been iterating repeat records for 23 hours. I quit!" ): break metrics_counter("commcare.repeaters.check.attempt_forward") record.attempt_forward_now(is_retry=True) else: iterating_time = datetime.utcnow() - start _soft_assert( iterating_time < timedelta(hours=6), f"It took {iterating_time} to iterate repeat records.") finally: check_repeater_lock.release()
def acquire(self, *args, **kw): buckets = self.timing_buckets with metrics_histogram_timer("commcare.lock.acquire_time", buckets), \ tracer.trace("commcare.lock.acquire", resource=self.key) as span: acquired = self.lock.acquire(*args, **kw) span.set_tags({ "key": self.key, "name": self.name, "acquired": ("true" if acquired else "false"), }) if acquired: timeout = getattr(self.lock, "timeout", None) if timeout: self.end_time = time.time() + timeout self.lock_timer.start() if self.track_unreleased: self.lock_trace = tracer.trace("commcare.lock.locked", resource=self.key) self.lock_trace.set_tags({"key": self.key, "name": self.name}) return acquired
def report_timing(self, action, key): def record_long_request(duration): if duration > 100: notify_exception(None, "S3BlobDB request took a long time.", details={ 'duration': duration, 's3_bucket_name': self.s3_bucket_name, 'action': action, 'key': key, }) return metrics_histogram_timer('commcare.blobs.requests.timing', timing_buckets=(.03, .1, .3, 1, 3, 10, 30, 100), tags={ 'action': action, 's3_bucket_name': self.s3_bucket_name }, callback=record_long_request)
def report_build_time(domain, app_id, build_type): start = time.time() # Histogram of all app builds name = { "new_release": 'commcare.app_build.new_release', "live_preview": 'commcare.app_build.live_preview', }[build_type] buckets = (1, 10, 30, 60, 120, 240) with metrics_histogram_timer(name, timing_buckets=buckets): yield # Detailed information for all apps that take longer than 30s to build end = time.time() duration = end - start if duration > 30: metrics_gauge('commcare.app_build.duration', duration, tags={ "domain": domain, "app_id": app_id, "build_type": build_type, })
def direct_ccz(request, domain): """ You must specify an app_id, and you may specify either 'version' or 'latest' latest can be one of: release: Latest starred version build: Latest version regardless of star save: Latest saved version of the application (even without a build) If 'version' and 'latest' aren't specified it will default to latest save You may also set 'include_multimedia=true' if you need multimedia. """ def error(msg, code=400): return json_response({ 'status': 'error', 'message': msg }, status_code=code) def get_app(app_id, version, latest): if version: return get_build_doc_by_version(domain, app_id, version) elif latest == 'build': return get_latest_build_doc(domain, app_id) elif latest == 'release': return get_latest_released_app_doc(domain, app_id) else: # either latest=='save' or they didn't specify return get_current_app(domain, app_id) app_id = request.GET.get('app_id', None) version = request.GET.get('version', None) latest = request.GET.get('latest', None) include_multimedia = request.GET.get('include_multimedia', 'false').lower() == 'true' visit_scheduler_enabled = toggles.VISIT_SCHEDULER.enabled_for_request( request) # Make sure URL params make sense if not app_id: return error("You must specify `app_id` in your GET parameters") if version and latest: return error("You can't specify both 'version' and 'latest'") if latest not in ( None, 'release', 'build', 'save', ): return error("latest must be either 'release', 'build', or 'save'") if version: try: version = int(version) except ValueError: return error("'version' must be an integer") try: app = get_app(app_id, version, latest) if not app: raise ResourceNotFound() app = app if isinstance(app, Document) else wrap_app(app) except (ResourceNotFound, DocTypeError): return error("Application not found", code=404) lang, langs = get_langs(request, app) with metrics_histogram_timer('commcare.app_build.live_preview', timing_buckets=(1, 10, 30, 60, 120, 240)): return get_direct_ccz(domain, app, lang, langs, version, include_multimedia, visit_scheduler_enabled)