Ejemplo n.º 1
0
def _deregister_not_running_services():
    try:
        ship = get_ship_name()
    except:
        ship = get_ship_ip()
    services = _get_local_services()
    running_containers_ids = _get_running_container_ids()
    for service_id in services.keys():
        container_id, is_subservice = _get_container_id_with_subservice(
            service_id)
        if container_id in running_containers_ids:
            continue
        if not is_subservice:
            name = services[service_id]['Service']
            update_container_status('crashed',
                                    ship=ship,
                                    service_name=name,
                                    container_id=container_id)
        deregister_services(container_id)

    for service_key in get_local_services():
        container_id = service_key.split('/')[-1]
        if container_id not in running_containers_ids:
            update_container_status('crashed', key=service_key)
            deregister_services(container_id)
def _get_local_running_containers():
    result = []
    for container in get_local_services():
        container_parameters = kv.kv_get(container)['params']
        if container_parameters:
            result.append(container_parameters)
    return result
Ejemplo n.º 3
0
def main():
    setup_sentry()
    args = _parse_args()
    saved_containers_path = args.saved_containers_path

    if not args.force and not _is_recovery_completed():
        get_logger().info(
            'Recovery is not completed. Aborting saving running containers.')
        return

    try:
        wait_for_consul_ready()
        saved_containers = get_local_services()
        containers_parameters_dict = {}

        for container in saved_containers:
            container_dict = kv.kv_get(container)
            containers_parameters_dict[container] = container_dict

        if not containers_parameters_dict:
            get_logger().info(
                'Aborted saving container because list is empty.')
            return

        _save_containers_parameters_list_in_file(containers_parameters_dict,
                                                 saved_containers_path)
        get_logger().info(
            'Containers have been saved to {}.'.format(saved_containers_path))

    except Exception as e:
        get_logger().exception(e)
        sys.exit(1)
def _get_crashed_services():
    services_list = get_local_services()
    crashed_services = []

    for service in services_list:
        service_dict = kv.kv_get(service)
        microservice_status = service_dict['Status']
        if microservice_status in ['crashed', 'not-recovered']:
            crashed_services.append(service)

    return crashed_services
Ejemplo n.º 5
0
    def POST(self):
        consul_host, error = self.get_post_parameter('host')
        if error:
            return self.status_error(error)
        ship = get_ship_name()
        local_services_data = {
            key: kv.kv_get(key)
            for key in get_local_services()
        }

        armada_size = _get_armada_size()
        if armada_size > 1:
            return self.status_error(
                'Currently only single ship armadas can join the others. '
                'Your armada has size: {0}.'.format(armada_size))

        try:
            agent_self_dict = consul_query(
                'agent/self', consul_address='{0}:8500'.format(consul_host))
            datacenter = agent_self_dict['Config']['Datacenter']
        except:
            return self.status_error(
                'Could not read remote host datacenter address.')

        current_consul_mode = _get_current_consul_mode()
        if current_consul_mode == consul_config.ConsulMode.BOOTSTRAP:
            override_runtime_settings(
                consul_mode=consul_config.ConsulMode.CLIENT,
                ship_ips=[consul_host],
                datacenter=datacenter)
        else:
            override_runtime_settings(ship_ips=[consul_host] +
                                      get_other_ship_ips(),
                                      datacenter=datacenter)

        if _restart_consul():
            supervisor_server = xmlrpclib.Server('http://localhost:9001/RPC2')
            hermes_init_output = supervisor_server.supervisor.startProcessGroup(
                'hermes_init')
            get_logger().info('hermes_init start: %s', hermes_init_output)
            set_ship_name(ship)
            for key, data in local_services_data.items():
                kv.kv_set(key, data)
            return self.status_ok()
        return self.status_error('Waiting for armada restart timed out.')
def _add_running_services_at_startup():
    wait_for_consul_ready()
    try:
        ship = get_ship_name()
        containers_saved_in_kv = get_local_services()
        sleep(10)
        all_services = consul_query('agent/services')
        del all_services['consul']
        for service_id, service_dict in all_services.items():
            if ':' in service_id:
                continue
            if service_dict['Service'] == 'armada':
                continue
            key = create_consul_services_key(ship, service_dict['Service'],
                                             service_id)
            if not containers_saved_in_kv or key not in containers_saved_in_kv:
                save_container(ship, service_id, 'started')
                get_logger().info(
                    'Added running service: {}'.format(service_id))
    except:
        get_logger().exception('Unable to add running services.')
Ejemplo n.º 7
0
    def _stop_service(self, container_id):
        service_list = get_local_services()
        try:
            key = fnmatch.filter(service_list, '*/{}'.format(container_id))[0]
        except (IndexError, TypeError):
            key = None

        if not is_container_running(container_id):
            if key:
                kv_remove(key)
            try:
                deregister_services(container_id)
            except Exception as e:
                get_logger().exception(e)
        else:
            run_command_in_container('supervisorctl stop armada_agent', container_id)
            trigger_hook('pre-stop', container_id)

            docker_api = docker_client.api()
            last_exception = None
            try:
                deregister_services(container_id)
            except Exception as e:
                get_logger().exception(e)
            for i in range(3):
                try:
                    docker_api.stop(container_id)
                except Exception as e:
                    get_logger().debug(e, exc_info=True)
                    last_exception = e
                if not is_container_running(container_id):
                    if key:
                        kv_remove(key)
                    break
            if is_container_running(container_id):
                get_logger().error('Could not stop container: %s', container_id)
                raise last_exception