def get_download_context(download_id, message=None, require_result=False): """ :param require_result: If set to True, is_ready will not be set to True unless result is also available. If check_state=False, this is ignored. """ download_data = DownloadBase.get(download_id) if download_data is None: download_data = DownloadBase(download_id=download_id) task = download_data.task task_status = get_task_status( task, is_multiple_download_task=isinstance(download_data, MultipleTaskDownload)) if task_status.failed(): raise TaskFailedError(task_status.error) if require_result: is_ready = task_status.success() and task_status.result is not None else: is_ready = task_status.success() return { 'result': task_status.result, 'error': task_status.error, 'is_ready': is_ready, 'is_alive': is_alive() if heartbeat_enabled() else True, 'progress': task_status.progress._asdict(), 'download_id': download_id, 'allow_dropbox_sync': isinstance(download_data, FileDownload) and download_data.use_transfer, 'has_file': download_data is not None and download_data.has_file, 'custom_message': message, }
def check_heartbeat(): celery_monitoring = getattr(settings, 'CELERY_FLOWER_URL', None) if celery_monitoring: all_workers = requests.get( celery_monitoring + '/api/workers', params={'status': True}, timeout=3, ).json() bad_workers = [] expected_running, expected_stopped = parse_celery_workers(all_workers) celery = Celery() celery.config_from_object(settings) worker_responses = celery.control.ping(timeout=10) pings = parse_celery_pings(worker_responses) for hostname in expected_running: if hostname not in pings or not pings[hostname]: bad_workers.append('* {} celery worker down'.format(hostname)) for hostname in expected_stopped: if hostname in pings: bad_workers.append( '* {} celery worker is running when we expect it to be stopped.'.format(hostname) ) if bad_workers: return ServiceStatus(False, '\n'.join(bad_workers)) is_alive = heartbeat.is_alive() return ServiceStatus(is_alive, "OK" if is_alive else "DOWN")
def get_download_context(download_id, message=None, require_result=False): """ :param require_result: If set to True, is_ready will not be set to True unless result is also available. If check_state=False, this is ignored. """ download_data = DownloadBase.get(download_id) if download_data is None: download_data = DownloadBase(download_id=download_id) task = download_data.task task_status = get_task_status( task, is_multiple_download_task=isinstance(download_data, MultipleTaskDownload)) if task_status.failed(): # Celery replaces exceptions with a wrapped one that we can't directly import # so I think our best choice is to match off the name, even though that's hacky exception_name = (task.result.__class__.__name__ if isinstance(task.result, Exception) else None) raise TaskFailedError(task_status.error, exception_name=exception_name) if require_result: is_ready = task_status.success() and task_status.result is not None else: is_ready = task_status.success() return { 'result': task_status.result, 'error': task_status.error, 'is_ready': is_ready, 'is_alive': is_alive() if heartbeat_enabled() else True, 'progress': task_status.progress._asdict(), 'download_id': download_id, 'allow_dropbox_sync': isinstance(download_data, FileDownload) and download_data.use_transfer, 'has_file': download_data is not None and download_data.has_file, 'custom_message': message, }
def check_heartbeat(): celery_monitoring = getattr(settings, 'CELERY_FLOWER_URL', None) if celery_monitoring: all_workers = requests.get( celery_monitoring + '/api/workers', params={ 'status': True }, timeout=3, ).json() bad_workers = [] expected_running, expected_stopped = parse_celery_workers(all_workers) celery = Celery() celery.config_from_object(settings) worker_responses = celery.control.ping(timeout=10) pings = parse_celery_pings(worker_responses) for hostname in expected_running: if hostname not in pings or not pings[hostname]: bad_workers.append('* {} celery worker down'.format(hostname)) for hostname in expected_stopped: if hostname in pings: bad_workers.append( '* {} celery worker is running when we expect it to be stopped.' .format(hostname)) if bad_workers: return ServiceStatus(False, '\n'.join(bad_workers)) is_alive = heartbeat.is_alive() return ServiceStatus(is_alive, "OK" if is_alive else "DOWN")
def ajax_job_poll(request, download_id, template="soil/partials/dl_status.html"): download_data = DownloadBase.get(download_id) if download_data is None: download_data = DownloadBase(download_id=download_id) is_ready = False try: if download_data.task.failed(): return HttpResponseServerError() except (TypeError, NotImplementedError): # no result backend / improperly configured pass else: is_ready = True alive = True if heartbeat_enabled(): alive = is_alive() context = RequestContext(request) context['is_ready'] = is_ready context['is_alive'] = alive context['progress'] = download_data.get_progress() context['download_id'] = download_id return render_to_response(template, context_instance=context)
def check_heartbeat(): celery_monitoring = getattr(settings, "CELERY_FLOWER_URL", None) if celery_monitoring: cresource = Resource(celery_monitoring, timeout=3) t = cresource.get("api/workers", params_dict={"status": True}).body_string() all_workers = json.loads(t) bad_workers = [] expected_running, expected_stopped = parse_celery_workers(all_workers) celery = Celery() celery.config_from_object(settings) worker_responses = celery.control.ping(timeout=10) pings = parse_celery_pings(worker_responses) for hostname in expected_running: if hostname not in pings or not pings[hostname]: bad_workers.append("* {} celery worker down".format(hostname)) for hostname in expected_stopped: if hostname in pings: bad_workers.append("* {} celery worker is running when we expect it to be stopped.".format(hostname)) if bad_workers: return ServiceStatus(False, "\n".join(bad_workers)) is_alive = heartbeat.is_alive() return ServiceStatus(is_alive, "OK" if is_alive else "DOWN")
def location_importer_job_poll(request, domain, download_id, template="locations/manage/partials/status.html"): download_data = DownloadBase.get(download_id) is_ready = False if download_data is None: download_data = DownloadBase(download_id=download_id) try: if download_data.task.failed(): return HttpResponseServerError() except (TypeError, NotImplementedError): # no result backend / improperly configured pass alive = True if heartbeat_enabled(): alive = is_alive() context = RequestContext(request) if download_data.task.state == 'SUCCESS': is_ready = True context['result'] = download_data.task.result.get('messages') context['is_ready'] = is_ready context['is_alive'] = alive context['progress'] = download_data.get_progress() context['download_id'] = download_id return render_to_response(template, context_instance=context)
def get_download_context(download_id, check_state=False, message=None, require_result=False): """ :param require_result: If set to True, is_ready will not be set to True unless result is also available. If check_state=False, this is ignored. """ is_ready = False context = {} download_data = DownloadBase.get(download_id) context['has_file'] = download_data is not None and download_data.has_file if download_data is None: download_data = DownloadBase(download_id=download_id) if isinstance(download_data, MultipleTaskDownload): if download_data.task.ready(): context['result'], context[ 'error'] = _get_download_context_multiple_tasks(download_data) else: try: if download_data.task.failed(): raise TaskFailedError() except (TypeError, NotImplementedError): # no result backend / improperly configured pass else: if not check_state: is_ready = True elif download_data.task.successful(): is_ready = True result = download_data.task.result context['result'] = result and result.get('messages') if result and result.get('errors'): raise TaskFailedError(result.get('errors')) alive = True if heartbeat_enabled(): alive = is_alive() progress = download_data.get_progress() def progress_complete(): return (getattr(settings, 'CELERY_ALWAYS_EAGER', False) or progress.get('percent', 0) == 100 and not progress.get('error', False)) context['is_ready'] = is_ready or progress_complete() if check_state and require_result: context['is_ready'] = context['is_ready'] and context.get( 'result') is not None context['is_alive'] = alive context['progress'] = progress context['download_id'] = download_id context['allow_dropbox_sync'] = isinstance( download_data, FileDownload) and download_data.use_transfer context['custom_message'] = message return context
def check_celery_health(): def get_stats(celery_monitoring, status_only=False, refresh=False): cresource = Resource(celery_monitoring, timeout=3) endpoint = "api/workers" params = {'refresh': 'true'} if refresh else {} if status_only: params['status'] = 'true' try: t = cresource.get(endpoint, params_dict=params).body_string() return json.loads(t) except Exception: return {} def get_task_html(detailed_stats, worker_name): tasks_ok = 'label-success' tasks_full = 'label-warning' tasks_html = mark_safe('<span class="label %s">unknown</span>' % tasks_full) try: worker_stats = detailed_stats[worker_name] pool_stats = worker_stats['stats']['pool'] running_tasks = pool_stats['writes']['inqueues']['active'] concurrency = pool_stats['max-concurrency'] completed_tasks = pool_stats['writes']['total'] tasks_class = tasks_full if running_tasks == concurrency else tasks_ok tasks_html = mark_safe( '<span class="label %s">%d / %d</span> :: %d' % ( tasks_class, running_tasks, concurrency, completed_tasks ) ) except KeyError: pass return tasks_html ret = {} celery_monitoring = getattr(settings, 'CELERY_FLOWER_URL', None) worker_status = "" if celery_monitoring: worker_ok = '<span class="label label-success">OK</span>' worker_bad = '<span class="label label-important">Down</span>' worker_info = [] worker_status = get_stats(celery_monitoring, status_only=True) detailed_stats = get_stats(celery_monitoring, refresh=True) for worker_name, status in worker_status.items(): status_html = mark_safe(worker_ok if status else worker_bad) tasks_html = get_task_html(detailed_stats, worker_name) worker_info.append(' '.join([worker_name, status_html, tasks_html])) worker_status = '<br>'.join(worker_info) ret['worker_status'] = mark_safe(worker_status) ret['heartbeat'] = heartbeat.is_alive() return ret
def hb_check(): celery_monitoring = getattr(settings, 'CELERY_FLOWER_URL', None) if celery_monitoring: try: cresource = Resource(celery_monitoring, timeout=3) t = cresource.get("api/workers").body_string() all_workers = json.loads(t) bad_workers = [] for hostname, w in all_workers.items(): if not w['status']: bad_workers.append('* {} celery worker down'.format(hostname)) if bad_workers: return (False, '\n'.join(bad_workers)) else: hb = heartbeat.is_alive() except: hb = False else: try: hb = heartbeat.is_alive() except: hb = False return (hb, None)
def hb_check(): celery_monitoring = getattr(settings, 'CELERY_FLOWER_URL', None) if celery_monitoring: try: cresource = Resource(celery_monitoring, timeout=3) t = cresource.get("api/workers").body_string() all_workers = json.loads(t) bad_workers = [] for hostname, w in all_workers.items(): if not w['status']: bad_workers.append( '* {} celery worker down'.format(hostname)) if bad_workers: return (False, '\n'.join(bad_workers)) else: hb = heartbeat.is_alive() except: hb = False else: try: hb = heartbeat.is_alive() except: hb = False return (hb, None)
def ajax_job_poll(request, download_id, template="soil/partials/dl_status.html"): download_data = cache.get(download_id, None) if download_data == None: is_ready = False else: is_ready=True alive = True if heartbeat_enabled(): alive = is_alive() context = RequestContext(request) context['is_ready'] = is_ready context['is_alive'] = alive context['download_id'] = download_id return render_to_response(template, context_instance=context)
def check_heartbeat(): celery_monitoring = getattr(settings, 'CELERY_FLOWER_URL', None) if celery_monitoring: cresource = Resource(celery_monitoring, timeout=3) t = cresource.get("api/workers", params_dict={'status': True}).body_string() all_workers = json.loads(t) bad_workers = [] for hostname, status in all_workers.items(): if not status: bad_workers.append('* {} celery worker down'.format(hostname)) if bad_workers: return ServiceStatus(False, '\n'.join(bad_workers)) is_alive = heartbeat.is_alive() return ServiceStatus(is_alive, "OK" if is_alive else "DOWN")
def get_download_context(download_id, check_state=False, message=None): is_ready = False context = {} download_data = DownloadBase.get(download_id) context['has_file'] = download_data is not None and download_data.has_file if download_data is None: download_data = DownloadBase(download_id=download_id) if isinstance(download_data, MultipleTaskDownload): if download_data.task.ready(): context['result'], context['error'] = _get_download_context_multiple_tasks(download_data) else: try: if download_data.task.failed(): raise TaskFailedError() except (TypeError, NotImplementedError): # no result backend / improperly configured pass else: if not check_state: is_ready = True elif download_data.task.successful(): is_ready = True result = download_data.task.result context['result'] = result and result.get('messages') if result and result.get('errors'): raise TaskFailedError(result.get('errors')) alive = True if heartbeat_enabled(): alive = is_alive() progress = download_data.get_progress() def progress_complete(): return ( getattr(settings, 'CELERY_ALWAYS_EAGER', False) or progress.get('percent', 0) == 100 and not progress.get('error', False) ) context['is_ready'] = is_ready or progress_complete() context['is_alive'] = alive context['progress'] = progress context['download_id'] = download_id context['allow_dropbox_sync'] = isinstance(download_data, FileDownload) and download_data.use_transfer context['custom_message'] = message return context
def check_heartbeat(): celery_monitoring = getattr(settings, 'CELERY_FLOWER_URL', None) if celery_monitoring: cresource = Resource(celery_monitoring, timeout=3) t = cresource.get("api/workers", params_dict={ 'status': True }).body_string() all_workers = json.loads(t) bad_workers = [] for hostname, status in all_workers.items(): if not status: bad_workers.append('* {} celery worker down'.format(hostname)) if bad_workers: return ServiceStatus(False, '\n'.join(bad_workers)) is_alive = heartbeat.is_alive() return ServiceStatus(is_alive, "OK" if is_alive else "DOWN")
def importer_job_poll(request, domain, download_id, template="importer/partials/import_status.html"): download_data = DownloadBase.get(download_id) is_ready = False if download_data is None: download_data = DownloadBase(download_id=download_id) try: if download_data.task.failed(): return HttpResponseServerError() except (TypeError, NotImplementedError): # no result backend / improperly configured pass alive = True if heartbeat_enabled(): alive = is_alive() context = RequestContext(request) if download_data.task.result and 'error' in download_data.task.result: error = download_data.task.result['error'] if error == 'EXPIRED': return _spreadsheet_expired(request, domain) elif error == 'HAS_ERRORS': messages.error( request, _('The session containing the file you ' 'uploaded has expired - please upload ' 'a new one.')) return HttpResponseRedirect( base.ImportCases.get_url(domain=domain) + "?error=cache") if download_data.task.state == 'SUCCESS': is_ready = True context['result'] = download_data.task.result context['is_ready'] = is_ready context['is_alive'] = alive context['progress'] = download_data.get_progress() context['download_id'] = download_id return render_to_response(template, context_instance=context)
def get_download_context(download_id, check_state=False): is_ready = False context = {} download_data = DownloadBase.get(download_id) context['has_file'] = bool(download_data) if download_data is None: download_data = DownloadBase(download_id=download_id) try: if download_data.task.failed(): raise TaskFailedError() except (TypeError, NotImplementedError): # no result backend / improperly configured pass else: if not check_state: is_ready = True elif download_data.task.state == 'SUCCESS': is_ready = True result = download_data.task.result context['result'] = result and result.get('messages') if result and result.get('errors'): raise TaskFailedError(result.get('errors')) alive = True if heartbeat_enabled(): alive = is_alive() progress = download_data.get_progress() def progress_complete(): return ( getattr(settings, 'CELERY_ALWAYS_EAGER', False) and progress.get('percent', 0) == 100 and not progress.get('error', False) ) context['is_ready'] = is_ready or progress_complete() context['is_alive'] = alive context['progress'] = progress context['download_id'] = download_id return context
def importer_job_poll(request, domain, download_id, template="importer/partials/import_status.html"): download_data = DownloadBase.get(download_id) is_ready = False if download_data is None: download_data = DownloadBase(download_id=download_id) try: if download_data.task.failed(): return HttpResponseServerError() except (TypeError, NotImplementedError): # no result backend / improperly configured pass alive = True if heartbeat_enabled(): alive = is_alive() context = RequestContext(request) if download_data.task.result and 'error' in download_data.task.result: error = download_data.task.result['error'] if error == 'EXPIRED': return _spreadsheet_expired(request, domain) elif error == 'HAS_ERRORS': messages.error(request, _('The session containing the file you ' 'uploaded has expired - please upload ' 'a new one.')) return HttpResponseRedirect(base.ImportCases.get_url(domain=domain) + "?error=cache") if download_data.task.state == 'SUCCESS': is_ready = True context['result'] = download_data.task.result context['is_ready'] = is_ready context['is_alive'] = alive context['progress'] = download_data.get_progress() context['download_id'] = download_id context['url'] = base.ImportCases.get_url(domain=domain) return render_to_response(template, context_instance=context)
def server_up(req): '''View that just returns "success", which can be hooked into server monitoring tools like: pingdom''' try: hb = heartbeat.is_alive() except: hb = False #in reality when things go wrong with couch and postgres (as of this # writing) - it's far from graceful, so this will # likely never be # reached because another exception will fire first - but for # completeness sake, this check is done here to verify our calls will # work, and if other error handling allows the request to get this far. ## check django db try: user_count = User.objects.count() except: user_count = None ## check couch try: xforms = XFormInstance.view('couchforms/by_user', limit=1).all() except: xforms = None if hb and isinstance(user_count, int) and isinstance(xforms, list): return HttpResponse("success") else: message = ['Problems with HQ (%s):' % os.uname()[1]] if not hb: message.append(' * Celery and or celerybeat is down') if user_count is None: message.append(' * postgres has issues') if xforms is None: message.append(' * couch has issues') return HttpResponse('\n'.join(message), status=500)
tasks_ok = 'label-success' tasks_full = 'label-warning' worker_info = [] for hostname, w in all_workers.items(): status_html = mark_safe(worker_ok if w['status'] else worker_bad) tasks_class = tasks_full if w['running_tasks'] == w[ 'concurrency'] else tasks_ok tasks_html = mark_safe( '<span class="label %s">%d / %d</span> :: %d' % (tasks_class, w['running_tasks'], w['concurrency'], w['completed_tasks'])) worker_info.append(' '.join([hostname, status_html, tasks_html])) worker_status = '<br>'.join(worker_info) ret['worker_status'] = mark_safe(worker_status) ret['heartbeat'] = heartbeat.is_alive() return ret def check_memcached(): ret = {} mc = cache.get_cache('default') mc_status = "Unknown/Offline" mc_results = "" try: mc_stats = mc._cache.get_stats() if len(mc_stats) > 0: mc_status = "Online" stats_dict = mc_stats[0][1] bytes = stats_dict['bytes'] max_bytes = stats_dict['limit_maxbytes']
def check_heartbeat(): is_alive = heartbeat.is_alive() return ServiceStatus(is_alive, "OK" if is_alive else "DOWN")
worker_ok = '<span class="label label-success">OK</span>' worker_bad = '<span class="label label-important">Down</span>' tasks_ok = 'label-success' tasks_full = 'label-warning' worker_info = [] for hostname, w in all_workers.items(): status_html = mark_safe(worker_ok if w['status'] else worker_bad) tasks_class = tasks_full if w['running_tasks'] == w['concurrency'] else tasks_ok tasks_html = mark_safe('<span class="label %s">%d / %d</span> :: %d' % (tasks_class, w['running_tasks'], w['concurrency'], w['completed_tasks'])) worker_info.append(' '.join([hostname, status_html, tasks_html])) worker_status = '<br>'.join(worker_info) ret['worker_status'] = mark_safe(worker_status) ret['heartbeat'] = heartbeat.is_alive() return ret def check_memcached(): ret = {} mc = cache.get_cache('default') mc_status = "Unknown/Offline" mc_results = "" try: mc_stats = mc._cache.get_stats() if len(mc_stats) > 0: mc_status = "Online" stats_dict = mc_stats[0][1] bytes = stats_dict['bytes'] max_bytes = stats_dict['limit_maxbytes']
def hb_check(): try: hb = heartbeat.is_alive() except: hb = False return hb