def check_heartbeat(): celery_monitoring = getattr(settings, 'CELERY_FLOWER_URL', None) if celery_monitoring: all_workers = requests.get( celery_monitoring + '/api/workers', params={'status': True}, timeout=3, ).json() bad_workers = [] expected_running, expected_stopped = parse_celery_workers(all_workers) celery = Celery() celery.config_from_object(settings) worker_responses = celery.control.ping(timeout=10) pings = parse_celery_pings(worker_responses) for hostname in expected_running: if hostname not in pings or not pings[hostname]: bad_workers.append('* {} celery worker down'.format(hostname)) for hostname in expected_stopped: if hostname in pings: bad_workers.append( '* {} celery worker is running when we expect it to be stopped.'.format(hostname) ) if bad_workers: return ServiceStatus(False, '\n'.join(bad_workers)) is_alive = heartbeat.is_alive() return ServiceStatus(is_alive, "OK" if is_alive else "DOWN")
def check_heartbeat(): celery_monitoring = getattr(settings, 'CELERY_FLOWER_URL', None) if celery_monitoring: all_workers = requests.get( celery_monitoring + '/api/workers', params={ 'status': True }, timeout=3, ).json() bad_workers = [] expected_running, expected_stopped = parse_celery_workers(all_workers) celery = Celery() celery.config_from_object(settings) worker_responses = celery.control.ping(timeout=10) pings = parse_celery_pings(worker_responses) for hostname in expected_running: if hostname not in pings or not pings[hostname]: bad_workers.append('* {} celery worker down'.format(hostname)) for hostname in expected_stopped: if hostname in pings: bad_workers.append( '* {} celery worker is running when we expect it to be stopped.' .format(hostname)) if bad_workers: return ServiceStatus(False, '\n'.join(bad_workers)) is_alive = heartbeat.is_alive() return ServiceStatus(is_alive, "OK" if is_alive else "DOWN")
def check_heartbeat(): celery_monitoring = getattr(settings, "CELERY_FLOWER_URL", None) if celery_monitoring: cresource = Resource(celery_monitoring, timeout=3) t = cresource.get("api/workers", params_dict={"status": True}).body_string() all_workers = json.loads(t) bad_workers = [] expected_running, expected_stopped = parse_celery_workers(all_workers) celery = Celery() celery.config_from_object(settings) worker_responses = celery.control.ping(timeout=10) pings = parse_celery_pings(worker_responses) for hostname in expected_running: if hostname not in pings or not pings[hostname]: bad_workers.append("* {} celery worker down".format(hostname)) for hostname in expected_stopped: if hostname in pings: bad_workers.append("* {} celery worker is running when we expect it to be stopped.".format(hostname)) if bad_workers: return ServiceStatus(False, "\n".join(bad_workers)) is_alive = heartbeat.is_alive() return ServiceStatus(is_alive, "OK" if is_alive else "DOWN")
def handle(self, hostname, *args, **options): celery = Celery() celery.config_from_object(settings) celery.control.broadcast('shutdown', destination=[hostname]) worker_responses = celery.control.ping(timeout=10, destination=[hostname]) pings = parse_celery_pings(worker_responses) if hostname in pings: print 'Did not shutdown worker' exit(1) print 'Successfully initiated warm shutdown'
def handle(self, hostname, **options): celery = Celery() celery.config_from_object(settings) celery.control.broadcast('shutdown', destination=[hostname]) worker_responses = celery.control.ping(timeout=10, destination=[hostname]) pings = parse_celery_pings(worker_responses) if hostname in pings: print('Did not shutdown worker') exit(1) print('Successfully initiated warm shutdown')
def _kill_stale_workers(): celery_monitoring = getattr(settings, 'CELERY_FLOWER_URL', None) if celery_monitoring: cresource = Resource(celery_monitoring, timeout=3) t = cresource.get("api/workers", params_dict={'status': True}).body_string() all_workers = json.loads(t) expected_running, expected_stopped = parse_celery_workers(all_workers) celery = Celery() celery.config_from_object(settings) worker_responses = celery.control.ping(timeout=10) pings = parse_celery_pings(worker_responses) hosts_to_stop = [hostname for hostname in expected_stopped if hostname in pings] if hosts_to_stop: celery.control.broadcast('shutdown', destination=hosts_to_stop)
def _kill_stale_workers(): celery_monitoring = getattr(settings, 'CELERY_FLOWER_URL', None) if celery_monitoring: cresource = Resource(celery_monitoring, timeout=3) t = cresource.get("api/workers", params_dict={'status': True}).body_string() all_workers = json.loads(t) expected_running, expected_stopped = parse_celery_workers(all_workers) celery = Celery() celery.config_from_object(settings) worker_responses = celery.control.ping(timeout=10) pings = parse_celery_pings(worker_responses) hosts_to_stop = filter(lambda hostname: hostname in pings, expected_stopped) if hosts_to_stop: celery.control.broadcast('shutdown', destination=hosts_to_stop) _soft_assert(False, 'Used kill stale for: {}'.format(', '.join(hosts_to_stop)))
def _kill_stale_workers(): celery_monitoring = getattr(settings, 'CELERY_FLOWER_URL', None) if celery_monitoring: all_workers = requests.get( celery_monitoring + '/api/workers', params={'status': True}, timeout=3, ).json() expected_running, expected_stopped = parse_celery_workers(all_workers) celery = Celery() celery.config_from_object(settings) worker_responses = celery.control.ping(timeout=10) pings = parse_celery_pings(worker_responses) hosts_to_stop = [hostname for hostname in expected_stopped if hostname in pings] if hosts_to_stop: celery.control.broadcast('shutdown', destination=hosts_to_stop)
def _check_celery_workers(): celery_monitoring = getattr(settings, 'CELERY_FLOWER_URL', None) if celery_monitoring: all_workers = requests.get( celery_monitoring + '/api/workers', params={ 'status': True }, timeout=3, ).json() bad_workers = [] expected_running, expected_stopped = parse_celery_workers(all_workers) celery = Celery() celery.config_from_object(settings) expected_running = set(expected_running) expected_stopped = set(expected_stopped) responses_any = set() responses_all = set() # Retry because of https://github.com/celery/celery/issues/4758 (?) for _ in range(20): pings = { hostname for hostname, value in parse_celery_pings( celery.control.ping(timeout=1)).items() if value } responses_any |= pings responses_all &= pings if expected_running == responses_any: break for hostname in expected_running - responses_any: bad_workers.append('* {} celery worker down'.format(hostname)) for hostname in expected_stopped & responses_all: bad_workers.append( '* {} celery worker is running when we expect it to be stopped.' .format(hostname)) if bad_workers: return ServiceStatus(False, '\n'.join(bad_workers)) return ServiceStatus(True, "OK")
def _kill_stale_workers(): celery_monitoring = getattr(settings, 'CELERY_FLOWER_URL', None) if celery_monitoring: all_workers = requests.get( celery_monitoring + '/api/workers', params={ 'status': True }, timeout=3, ).json() expected_running, expected_stopped = parse_celery_workers(all_workers) celery = Celery() celery.config_from_object(settings) worker_responses = celery.control.ping(timeout=10) pings = parse_celery_pings(worker_responses) hosts_to_stop = [ hostname for hostname in expected_stopped if hostname in pings ] if hosts_to_stop: celery.control.broadcast('shutdown', destination=hosts_to_stop)