def _deregister_not_running_services(): try: ship = get_ship_name() except: ship = get_ship_ip() services = _get_local_services() running_containers_ids = _get_running_container_ids() for service_id in services.keys(): container_id, is_subservice = _get_container_id_with_subservice( service_id) if container_id in running_containers_ids: continue if not is_subservice: name = services[service_id]['Service'] update_container_status('crashed', ship=ship, service_name=name, container_id=container_id) deregister_services(container_id) for service_key in get_local_services(): container_id = service_key.split('/')[-1] if container_id not in running_containers_ids: update_container_status('crashed', key=service_key) deregister_services(container_id)
def _get_local_running_containers(): result = [] for container in get_local_services(): container_parameters = kv.kv_get(container)['params'] if container_parameters: result.append(container_parameters) return result
def main(): setup_sentry() args = _parse_args() saved_containers_path = args.saved_containers_path if not args.force and not _is_recovery_completed(): get_logger().info( 'Recovery is not completed. Aborting saving running containers.') return try: wait_for_consul_ready() saved_containers = get_local_services() containers_parameters_dict = {} for container in saved_containers: container_dict = kv.kv_get(container) containers_parameters_dict[container] = container_dict if not containers_parameters_dict: get_logger().info( 'Aborted saving container because list is empty.') return _save_containers_parameters_list_in_file(containers_parameters_dict, saved_containers_path) get_logger().info( 'Containers have been saved to {}.'.format(saved_containers_path)) except Exception as e: get_logger().exception(e) sys.exit(1)
def _get_crashed_services(): services_list = get_local_services() crashed_services = [] for service in services_list: service_dict = kv.kv_get(service) microservice_status = service_dict['Status'] if microservice_status in ['crashed', 'not-recovered']: crashed_services.append(service) return crashed_services
def POST(self): consul_host, error = self.get_post_parameter('host') if error: return self.status_error(error) ship = get_ship_name() local_services_data = { key: kv.kv_get(key) for key in get_local_services() } armada_size = _get_armada_size() if armada_size > 1: return self.status_error( 'Currently only single ship armadas can join the others. ' 'Your armada has size: {0}.'.format(armada_size)) try: agent_self_dict = consul_query( 'agent/self', consul_address='{0}:8500'.format(consul_host)) datacenter = agent_self_dict['Config']['Datacenter'] except: return self.status_error( 'Could not read remote host datacenter address.') current_consul_mode = _get_current_consul_mode() if current_consul_mode == consul_config.ConsulMode.BOOTSTRAP: override_runtime_settings( consul_mode=consul_config.ConsulMode.CLIENT, ship_ips=[consul_host], datacenter=datacenter) else: override_runtime_settings(ship_ips=[consul_host] + get_other_ship_ips(), datacenter=datacenter) if _restart_consul(): supervisor_server = xmlrpclib.Server('http://localhost:9001/RPC2') hermes_init_output = supervisor_server.supervisor.startProcessGroup( 'hermes_init') get_logger().info('hermes_init start: %s', hermes_init_output) set_ship_name(ship) for key, data in local_services_data.items(): kv.kv_set(key, data) return self.status_ok() return self.status_error('Waiting for armada restart timed out.')
def _add_running_services_at_startup(): wait_for_consul_ready() try: ship = get_ship_name() containers_saved_in_kv = get_local_services() sleep(10) all_services = consul_query('agent/services') del all_services['consul'] for service_id, service_dict in all_services.items(): if ':' in service_id: continue if service_dict['Service'] == 'armada': continue key = create_consul_services_key(ship, service_dict['Service'], service_id) if not containers_saved_in_kv or key not in containers_saved_in_kv: save_container(ship, service_id, 'started') get_logger().info( 'Added running service: {}'.format(service_id)) except: get_logger().exception('Unable to add running services.')
def _stop_service(self, container_id): service_list = get_local_services() try: key = fnmatch.filter(service_list, '*/{}'.format(container_id))[0] except (IndexError, TypeError): key = None if not is_container_running(container_id): if key: kv_remove(key) try: deregister_services(container_id) except Exception as e: get_logger().exception(e) else: run_command_in_container('supervisorctl stop armada_agent', container_id) trigger_hook('pre-stop', container_id) docker_api = docker_client.api() last_exception = None try: deregister_services(container_id) except Exception as e: get_logger().exception(e) for i in range(3): try: docker_api.stop(container_id) except Exception as e: get_logger().debug(e, exc_info=True) last_exception = e if not is_container_running(container_id): if key: kv_remove(key) break if is_container_running(container_id): get_logger().error('Could not stop container: %s', container_id) raise last_exception