def main(): setup_sentry() args = _parse_args() saved_containers_path = args.saved_containers_path try: wait_for_consul_ready() ship = get_ship_name() saved_containers = kv.kv_list('ships/{}/service/'.format(ship)) containers_parameters_dict = {} if saved_containers: for container in saved_containers: container_dict = kv.kv_get(container) containers_parameters_dict[container] = container_dict if containers_parameters_dict: try: _save_containers_parameters_list_in_kv_store(containers_parameters_dict) get_logger().info('Containers have been saved to kv store.') except Exception as e: get_logger().exception(e) if not args.force and not _is_recovery_completed(): get_logger().warning('Recovery is not completed. Aborting saving running containers.') return _save_containers_parameters_list_in_file(containers_parameters_dict, saved_containers_path) get_logger().info('Containers have been saved to {}.'.format(saved_containers_path)) else: get_logger().info('Aborted saving container because of errors.') except Exception as e: get_logger().exception(e) sys.exit(1)
def _deregister_not_running_services(): try: ship = get_ship_name() except: ship = get_ship_ip() services = _get_local_services() running_containers_ids = _get_running_container_ids() for service_id in services.keys(): container_id, is_subservice = _get_container_id_with_subservice( service_id) if container_id in running_containers_ids: continue if not is_subservice: name = services[service_id]['Service'] kv.update_container_status('crashed', ship=ship, name=name, container_id=container_id) deregister_services(container_id) services_keys = kv.kv_list('ships/{}/service/'.format(ship)) or [] for service_key in services_keys: container_id = service_key.split('/')[-1] if container_id not in running_containers_ids: kv.update_container_status('crashed', key=service_key) deregister_services(container_id)
def _get_local_running_containers(): result = [] ship = get_ship_name() local_containers = kv.kv_list('ships/{}/service/'.format(ship)) or [] for container in local_containers: container_parameters = kv.kv_get(container)['params'] if container_parameters: result.append(container_parameters) return result
def _save_runtime_settings(): consul_settings = {} consul_settings['is_commander'] = is_ship_commander() consul_settings['name'] = get_ship_name() consul_settings['ships'] = get_other_ship_ips() consul_settings['datacenter'] = get_current_datacenter() consul_settings['dockyards'] = alias.get_list() with open(consul_config.RUNTIME_SETTINGS_PATH, 'w') as runtime_settings: runtime_settings.write(json.dumps(consul_settings, sort_keys=True, indent=4))
def POST(self): ship_name, error = self.get_post_parameter('name') if error: return self.status_error(error) other_ship_names = [get_ship_name(ip) for ip in get_other_ship_ips()] name_taken = ship_name in other_ship_names if not ship_name or ship_name == 'None' or name_taken: return self.status_error('Incorrect ship name: {}'.format(ship_name)) set_ship_name(ship_name) return self.status_ok()
def _get_services_list(filter_microservice_name, filter_env, filter_app_id, filter_local): if filter_local: ship_list = [get_ship_name()] else: ship_list = get_ship_names() services_dict = {} if not ship_list: return {} for ship in ship_list: containers = kv.kv_get('containers_parameters_list/{}'.format(ship)) if containers and isinstance(containers, dict): services_dict.update(containers) services_list = services_dict.keys() result = {} if not services_list: return result if filter_microservice_name: services_list = fnmatch.filter(services_list, 'ships/*/service/{}/*'.format(filter_microservice_name)) for service in services_list: service_dict = services_dict[service] microservice_name = service_dict['ServiceName'] microservice_status = service_dict['Status'] microservice_id = service_dict['ServiceID'] container_id = service_dict['container_id'] microservice_start_timestamp = service_dict['start_timestamp'] not_available = 'n/a' microservice_tags_dict = {} try: if service_dict['params']['microservice_env']: microservice_tags_dict['env'] = service_dict['params']['microservice_env'] if service_dict['params']['microservice_app_id']: microservice_tags_dict['app_id'] = service_dict['params']['microservice_app_id'] except KeyError as e: get_logger().warning(repr(e)) matches_env = (filter_env is None) or (filter_env == microservice_tags_dict.get('env')) matches_app_id = (filter_app_id is None) or (filter_app_id == microservice_tags_dict.get('app_id')) if matches_env and matches_app_id: microservice_dict = { 'name': microservice_name, 'status': microservice_status, 'address': not_available, 'microservice_id': microservice_id, 'container_id': container_id, 'tags': microservice_tags_dict, 'start_timestamp': microservice_start_timestamp, } result[microservice_id] = microservice_dict return result
def _load_containers_to_kv_store(saved_containers_path): wait_for_consul_ready() try: ship = get_ship_name() saved_containers = _load_saved_containers_parameters_list(saved_containers_path) if isinstance(saved_containers, dict): _load_from_dict(saved_containers, ship) else: _load_from_list(saved_containers, ship) except: get_logger().exception('Unable to load from %s', saved_containers_path)
def _save_runtime_settings(): consul_settings = {} consul_settings['is_commander'] = is_ship_commander() consul_settings['name'] = get_ship_name() consul_settings['ships'] = get_other_ship_ips() consul_settings['datacenter'] = get_current_datacenter() consul_settings['dockyards'] = alias.get_list() with open(consul_config.RUNTIME_SETTINGS_PATH, 'w') as runtime_settings: runtime_settings.write( json.dumps(consul_settings, sort_keys=True, indent=4))
def POST(self): ship_name, error = self.get_post_parameter('name') if error: return self.status_error(error) other_ship_names = [get_ship_name(ip) for ip in get_other_ship_ips()] name_taken = ship_name in other_ship_names if not ship_name or ship_name == 'None' or name_taken: return self.status_error( 'Incorrect ship name: {}'.format(ship_name)) set_ship_name(ship_name) return self.status_ok()
def _load_containers_to_kv_store(saved_containers_path): wait_for_consul_ready() try: ship = get_ship_name() saved_containers = _load_saved_containers_parameters_list( saved_containers_path) if isinstance(saved_containers, dict): _load_from_dict(saved_containers, ship) else: _load_from_list(saved_containers, ship) except: get_logger().exception('Unable to load from %s', saved_containers_path)
def _get_crashed_services(): ship = get_ship_name() services_list = kv.kv_list('ships/{}/service/'.format(ship)) crashed_services = [] if not services_list: return crashed_services for service in services_list: service_dict = kv.kv_get(service) microservice_status = service_dict['Status'] if microservice_status in ['crashed', 'not-recovered']: crashed_services.append(service) return crashed_services
def recover_saved_containers_from_parameters(saved_containers): wait_for_consul_ready() try: ship = get_ship_name() if isinstance(saved_containers, dict): _load_from_dict(saved_containers, ship) else: _load_from_list(saved_containers, ship) except Exception as e: get_logger().exception(e) containers_to_be_recovered = recover_containers_from_kv_store() return containers_to_be_recovered
def recover_saved_containers_from_parameters(saved_containers): wait_for_consul_ready() try: ship = get_ship_name() if isinstance(saved_containers, dict): _load_from_dict(saved_containers, ship) else: _load_from_list(saved_containers, ship) except: traceback.print_exc() containers_to_be_recovered = recover_containers_from_kv_store() return containers_to_be_recovered
def _load_containers_to_kv_store(saved_containers_path): wait_for_consul_ready() try: ship = get_ship_name() saved_containers = _load_saved_containers_parameters_list( saved_containers_path) if isinstance(saved_containers, dict): _load_from_dict(saved_containers, ship) else: _load_from_list(saved_containers, ship) except: traceback.print_exc() get_logger().error( 'Unable to load from {}.'.format(saved_containers_path))
def recover_saved_containers_from_parameters(saved_containers): wait_for_consul_ready() try: ship = get_ship_name() containers_saved_in_kv = kv.kv_list('ships/{}/service/'.format(ship)) if isinstance(saved_containers, dict): _load_from_dict(saved_containers, containers_saved_in_kv, ship) else: _load_from_list(saved_containers, ship) except: traceback.print_exc() containers_to_be_recovered = recover_containers_from_kv_store() return containers_to_be_recovered
def _load_containers_to_kv_store(saved_containers_path): wait_for_consul_ready() try: ship = get_ship_name() containers_saved_in_kv = kv.kv_list('ships/{}/service/'.format(ship)) saved_containers = _load_saved_containers_parameters_list(saved_containers_path) _add_running_services_at_startup(containers_saved_in_kv, ship) if isinstance(saved_containers, dict): _load_from_dict(saved_containers, containers_saved_in_kv, ship) else: _load_from_list(saved_containers, ship) except: traceback.print_exc() get_logger().error('Unable to load from {}.'.format(saved_containers_path))
def _get_services_list(filter_microservice_name, filter_env, filter_app_id, filter_local): consul_key = 'containers_parameters_list' if filter_local: consul_key = '{}/{}'.format(consul_key, get_ship_name()) services_by_ship = kv.kv_get_recurse(consul_key) if not services_by_ship: return {} result = {} for services_dict in services_by_ship.values(): result.update(_parse_single_ship(services_dict, filter_microservice_name, filter_env, filter_app_id)) return result
def _start_container(self, long_container_id): docker_api = docker_client.api(timeout=30) docker_api.start(long_container_id) service_endpoints = {} agent_self_dict = consul_query('agent/self') service_ip = agent_self_dict['Config']['AdvertiseAddr'] docker_inspect = docker_api.inspect_container(long_container_id) ship = get_ship_name() container_id = shorten_container_id(long_container_id) save_service(ship, container_id, status='started') for container_port, host_address in docker_inspect['NetworkSettings']['Ports'].items(): service_endpoints['{0}:{1}'.format(service_ip, host_address[0]['HostPort'])] = container_port return service_endpoints
def _start_container(self, long_container_id): docker_api = docker_client.api(timeout=30) docker_api.start(long_container_id) service_endpoints = {} agent_self_dict = consul_query('agent/self') service_ip = agent_self_dict['Config']['AdvertiseAddr'] docker_inspect = docker_api.inspect_container(long_container_id) ship = get_ship_name() container_id = shorten_container_id(long_container_id) kv.save_container(ship, container_id, status='started') for container_port, host_address in docker_inspect['NetworkSettings']['Ports'].items(): service_endpoints['{0}:{1}'.format(service_ip, host_address[0]['HostPort'])] = container_port return service_endpoints
def _get_services_list(filter_microservice_name, filter_env, filter_app_id, filter_local): consul_key = 'containers_parameters_list' if filter_local: consul_key = '{}/{}'.format(consul_key, get_ship_name()) services_by_ship = kv.kv_get_recurse(consul_key) if not services_by_ship: return {} result = {} for services_dict in services_by_ship.values(): result.update( _parse_single_ship(services_dict, filter_microservice_name, filter_env, filter_app_id)) return result
def POST(self): consul_host, error = self.get_post_parameter('host') if error: return self.status_error(error) ship = get_ship_name() local_services = kv.kv_list('ships/{}/service/'.format(ship)) or [] local_services_data = {key: kv.kv_get(key) for key in local_services} armada_size = _get_armada_size() if armada_size > 1: return self.status_error( 'Currently only single ship armadas can join the others. ' 'Your armada has size: {0}.'.format(armada_size)) try: agent_self_dict = consul_query( 'agent/self', consul_address='{0}:8500'.format(consul_host)) datacenter = agent_self_dict['Config']['Datacenter'] except: return self.status_error( 'Could not read remote host datacenter address.') current_consul_mode = _get_current_consul_mode() if current_consul_mode == consul_config.ConsulMode.BOOTSTRAP: override_runtime_settings( consul_mode=consul_config.ConsulMode.CLIENT, ship_ips=[consul_host], datacenter=datacenter) else: override_runtime_settings(ship_ips=[consul_host] + get_other_ship_ips(), datacenter=datacenter) if _restart_consul(): supervisor_server = xmlrpclib.Server('http://localhost:9001/RPC2') hermes_init_output = supervisor_server.supervisor.startProcessGroup( 'hermes_init') get_logger().info('hermes_init start: %s', hermes_init_output) set_ship_name(ship) for key, data in local_services_data.items(): kv.kv_set(key, data) return self.status_ok() return self.status_error('Waiting for armada restart timed out.')
def _stop_service(self, container_id): ship = get_ship_name() service_list = kv_list('ships/{}/service/'.format(ship)) try: key = fnmatch.filter(service_list, '*/{}'.format(container_id))[0] except (IndexError, TypeError): key = None if not is_container_running(container_id): if key: kv_remove(key) try: deregister_services(container_id) except Exception as e: get_logger().exception(e) else: run_command_in_container('supervisorctl stop armada_agent', container_id) # TODO: Compatibility with old microservice images. Should be removed in future armada version. run_command_in_container( 'supervisorctl stop register_in_service_discovery', container_id) docker_api = docker_client.api() last_exception = None try: deregister_services(container_id) except Exception as e: get_logger().exception(e) for i in range(3): try: docker_api.stop(container_id) except Exception as e: get_logger().debug(e, exc_info=True) last_exception = e if not is_container_running(container_id): if key: kv_remove(key) break if is_container_running(container_id): get_logger().error('Could not stop container: %s', container_id) raise last_exception
def _add_running_services_at_startup(): wait_for_consul_ready() try: ship = get_ship_name() containers_saved_in_kv = kv.kv_list('ships/{}/service/'.format(ship)) sleep(10) all_services = consul_query('agent/services') del all_services['consul'] for service_id, service_dict in all_services.items(): if ':' in service_id: continue if service_dict['Service'] == 'armada': continue key = 'ships/{}/service/{}/{}'.format(ship, service_dict['Service'], service_id) if not containers_saved_in_kv or key not in containers_saved_in_kv: kv.save_container(ship, service_id, 'started') get_logger().info('Added running service: {}'.format(service_id)) except: get_logger().exception('Unable to add running services.')
def _stop_service(self, container_id): ship = get_ship_name() service_list = kv_list("ships/{}/service/".format(ship)) try: key = fnmatch.filter(service_list, "*/{}".format(container_id))[0] except (IndexError, TypeError): key = None if not is_container_running(container_id): if key: kv_remove(key) try: deregister_services(container_id) except Exception as e: get_logger().exception(e) else: run_command_in_container("supervisorctl stop armada_agent", container_id) # TODO: Compatibility with old microservice images. Should be removed in future armada version. run_command_in_container("supervisorctl stop register_in_service_discovery", container_id) docker_api = docker_client.api() last_exception = None try: deregister_services(container_id) except Exception as e: get_logger().exception(e) for i in range(3): try: docker_api.stop(container_id) except Exception as e: get_logger().debug(e, exc_info=True) last_exception = e if not is_container_running(container_id): if key: kv_remove(key) break if is_container_running(container_id): get_logger().error("Could not stop container: %s", container_id) raise last_exception
def _deregister_not_running_services(): try: ship = get_ship_name() except: ship = get_ship_ip() services = _get_local_services() running_containers_ids = _get_running_container_ids() for service_id in services.keys(): container_id, is_subservice = _get_container_id_with_subservice(service_id) if container_id in running_containers_ids: continue if not is_subservice: name = services[service_id]['Service'] kv.update_container_status('crashed', ship=ship, name=name, container_id=container_id) deregister_services(container_id) services_keys = kv.kv_list('ships/{}/service/'.format(ship)) or [] for service_key in services_keys: container_id = service_key.split('/')[-1] if container_id not in running_containers_ids: kv.update_container_status('crashed', key=service_key) deregister_services(container_id)
def _add_running_services_at_startup(): wait_for_consul_ready() try: ship = get_ship_name() containers_saved_in_kv = kv.kv_list('ships/{}/service/'.format(ship)) sleep(10) all_services = consul_query('agent/services') del all_services['consul'] for service_id, service_dict in all_services.items(): if ':' in service_id: continue if service_dict['Service'] == 'armada': continue key = 'ships/{}/service/{}/{}'.format(ship, service_dict['Service'], service_id) if not containers_saved_in_kv or key not in containers_saved_in_kv: kv.save_container(ship, service_id, 'started') get_logger().info( 'Added running service: {}'.format(service_id)) except: get_logger().exception('Unable to add running services.')
def POST(self): consul_host, error = self.get_post_parameter('host') if error: return self.status_error(error) ship = get_ship_name() local_services = kv.kv_list('ships/{}/service/'.format(ship)) or [] local_services_data = {key: kv.kv_get(key) for key in local_services} armada_size = _get_armada_size() if armada_size > 1: return self.status_error('Currently only single ship armadas can join the others. ' 'Your armada has size: {0}.'.format(armada_size)) try: agent_self_dict = consul_query('agent/self', consul_address='{0}:8500'.format(consul_host)) datacenter = agent_self_dict['Config']['Datacenter'] except: return self.status_error('Could not read remote host datacenter address.') current_consul_mode = _get_current_consul_mode() if current_consul_mode == consul_config.ConsulMode.BOOTSTRAP: override_runtime_settings(consul_mode=consul_config.ConsulMode.CLIENT, ship_ips=[consul_host], datacenter=datacenter) else: override_runtime_settings(ship_ips=[consul_host] + get_other_ship_ips(), datacenter=datacenter) if _restart_consul(): supervisor_server = xmlrpclib.Server('http://localhost:9001/RPC2') hermes_init_output = supervisor_server.supervisor.startProcessGroup('hermes_init') get_logger().info('hermes_init start: %s', hermes_init_output) set_ship_name(ship) for key, data in local_services_data.items(): kv.kv_set(key, data) return self.status_ok() return self.status_error('Waiting for armada restart timed out.')
def GET(self): try: catalog_nodes_dict = consul_query('catalog/nodes') result = [] running_armada_services = _get_running_armada_services() ship_ip_to_armada = _create_ip_to_service(running_armada_services) current_ship_ip = get_ship_ip() for consul_node in catalog_nodes_dict: ship_ip = consul_node['Address'] ship_name = get_ship_name(ship_ip) armada_service = ship_ip_to_armada.get(ship_ip, {}) service_armada_address = armada_service.get('address', ship_ip) service_armada_status = armada_service.get('status', '?') service_armada_version = get_armada_version( service_armada_address) try: ship_role = get_ship_role(ship_ip) except: ship_role = '?' is_current = (ship_ip == current_ship_ip) armada_instance = { 'name': ship_name, 'role': ship_role, 'address': service_armada_address, 'status': service_armada_status, 'version': service_armada_version, 'microservice_id': armada_service.get('microservice_id'), 'is_current': is_current } result.append(armada_instance) except Exception as e: return self.status_exception('Could not get armada info.', e) return self.status_ok({'result': result})
def GET(self): try: catalog_nodes_dict = consul_query('catalog/nodes') result = [] running_armada_services = _get_running_armada_services() ship_ip_to_armada = _create_ip_to_service(running_armada_services) current_ship_ip = get_ship_ip() for consul_node in catalog_nodes_dict: ship_ip = consul_node['Address'] ship_name = get_ship_name(ship_ip) armada_service = ship_ip_to_armada.get(ship_ip, {}) service_armada_address = armada_service.get('address', ship_ip) service_armada_status = armada_service.get('status', '?') service_armada_version = get_armada_version(service_armada_address) try: ship_role = get_ship_role(ship_ip) except: ship_role = '?' is_current = (ship_ip == current_ship_ip) armada_instance = { 'name': ship_name, 'role': ship_role, 'address': service_armada_address, 'status': service_armada_status, 'version': service_armada_version, 'microservice_id': armada_service.get('microservice_id'), 'is_current': is_current } result.append(armada_instance) except Exception as e: return self.status_exception('Could not get armada info.', e) return self.status_ok({'result': result})
def GET(self): try: catalog_nodes_dict = consul_query("catalog/nodes") result = [] running_armada_services = _get_running_armada_services() ship_ip_to_armada = _create_ip_to_service(running_armada_services) current_ship_ip = get_ship_ip() for consul_node in catalog_nodes_dict: ship_ip = consul_node["Address"] ship_name = get_ship_name(ship_ip) armada_service = ship_ip_to_armada.get(ship_ip, {}) service_armada_address = armada_service.get("address", ship_ip) service_armada_status = armada_service.get("status", "?") service_armada_version = get_armada_version(service_armada_address) try: ship_role = get_ship_role(ship_ip) except: ship_role = "?" is_current = ship_ip == current_ship_ip armada_instance = { "name": ship_name, "role": ship_role, "address": service_armada_address, "status": service_armada_status, "version": service_armada_version, "microservice_id": armada_service.get("microservice_id"), "is_current": is_current, } result.append(armada_instance) except Exception as e: return self.status_exception("Could not get armada info.", e) return self.status_ok({"result": result})
def _save_containers_parameters_list_in_kv_store(containers_parameters_list): ship_name = get_ship_name() kv.kv_set('containers_parameters_list/{ship_name}'.format(**locals()), containers_parameters_list)
def GET(self): return get_ship_name()
def _save_containers_parameters_list_in_kv_store(containers_parameters_list): ship_name = get_ship_name() kv.set('containers_parameters_list/{ship_name}'.format(**locals()), containers_parameters_list)
def _get_services_list(filter_microservice_name, filter_env, filter_app_id, filter_local): if filter_local: ship_list = [get_ship_name()] else: ship_list = get_ship_names() services_dict = {} if not ship_list: return {} for ship in ship_list: containers = kv.kv_get('containers_parameters_list/{}'.format(ship)) if containers and isinstance(containers, dict): services_dict.update(containers) services_list = services_dict.keys() result = {} if not services_list: return result if filter_microservice_name: services_list = fnmatch.filter( services_list, 'ships/*/service/{}/*'.format(filter_microservice_name)) for service in services_list: service_dict = services_dict[service] microservice_name = service_dict['ServiceName'] microservice_status = service_dict['Status'] microservice_id = service_dict['ServiceID'] container_id = service_dict['container_id'] microservice_start_timestamp = service_dict['start_timestamp'] single_active_instance = service_dict.get('single_active_instance', False) not_available = 'n/a' microservice_tags_dict = {} try: if service_dict['params']['microservice_env']: microservice_tags_dict['env'] = service_dict['params'][ 'microservice_env'] if service_dict['params']['microservice_app_id']: microservice_tags_dict['app_id'] = service_dict['params'][ 'microservice_app_id'] except KeyError as e: get_logger().warning(repr(e)) matches_env = (filter_env is None) or ( filter_env == microservice_tags_dict.get('env')) matches_app_id = (filter_app_id is None) or ( filter_app_id == microservice_tags_dict.get('app_id')) if matches_env and matches_app_id: microservice_dict = { 'name': microservice_name, 'status': microservice_status, 'address': not_available, 'microservice_id': microservice_id, 'container_id': container_id, 'tags': microservice_tags_dict, 'start_timestamp': microservice_start_timestamp, 'single_active_instance': single_active_instance, } result[microservice_id] = microservice_dict return result