Esempio n. 1
0
def command_diagnose(args):
    microservice_name = args.microservice_name

    script = "diagnose.sh"
    if args.logs:
        script = "logs.sh"
    diagnostic_command = (
        "armada ssh -i {microservice_name} "
        "bash < /opt/armada/armada_command/diagnostic_scripts/{script}"
    ).format(**locals())
    exit_code = subprocess.call(diagnostic_command, shell=True)
    if exit_code != 0:
        instances = get_matched_containers(microservice_name)
        if instances is not None and len(instances) == 1:
            instance = instances[0]
            status = instance['Status']
            if status == 'recovering':
                params = instance['params']
                print('RESTART_CONTAINER_PARAMETERS:')
                print(json.dumps(params, indent=4, sort_keys=True))
            elif status in ['crashed', 'not-recovered']:
                params = instance['params']
                print('RESTART_CONTAINER_PARAMETERS:')
                print(json.dumps(params, indent=4, sort_keys=True))
                print('')
                container_id = instance['container_id']
                print('Docker logs of container_id: {}'.format(container_id))
                diagnostic_command = ("docker logs {}".format(container_id))
                subprocess.call(diagnostic_command, shell=True)
Esempio n. 2
0
def command_diagnose(args):
    microservice_name = args.microservice_name

    script = "diagnose.sh"
    if args.logs:
        script = "logs.sh"
    diagnostic_command = ("armada ssh -i {microservice_name} "
                          "bash < /opt/armada/armada_command/diagnostic_scripts/{script}").format(**locals())
    exit_code = subprocess.call(diagnostic_command, shell=True)
    if exit_code != 0:
        instances = get_matched_containers(microservice_name)
        if instances is not None and len(instances) == 1:
            instance = instances[0]
            status = instance['Status']
            if status == 'recovering':
                params = instance['params']
                print('RESTART_CONTAINER_PARAMETERS:')
                print(json.dumps(params, indent=4, sort_keys=True))
            elif status in ['crashed', 'not-recovered']:
                params = instance['params']
                print('RESTART_CONTAINER_PARAMETERS:')
                print(json.dumps(params, indent=4, sort_keys=True))
                print('')
                container_id = instance['container_id']
                print('Docker logs of container_id: {}'.format(container_id))
                diagnostic_command = ("docker logs {}".format(container_id))
                subprocess.call(diagnostic_command, shell=True)
Esempio n. 3
0
def _recover_container(container_parameters):
    get_logger().info('Recovering: %s ...\n', json.dumps(container_parameters))
    recovery_result = armada_api.post('run', container_parameters)
    if recovery_result.get('status') == 'ok':
        get_logger().info('Recovered container: %s', json.dumps(recovery_result))
        return True
    else:
        get_logger().error('Could not recover container: %s', json.dumps(recovery_result))
        return False
Esempio n. 4
0
def _recover_container(container_parameters):
    get_logger().info('Recovering: %s ...\n', json.dumps(container_parameters))
    recovery_result = armada_api.post('run', container_parameters)
    if recovery_result.get('status') == 'ok':
        get_logger().info('Recovered container: %s', json.dumps(recovery_result))
        return True
    else:
        get_logger().error('Could not recover container: %s', json.dumps(recovery_result))
        return False
Esempio n. 5
0
def recover_containers_from_kv_store():
    services_to_be_recovered = _get_crashed_services()

    for service in services_to_be_recovered:
        kv.update_container_status('recovering', key=service)

    recovery_retry_count = 0
    while services_to_be_recovered and recovery_retry_count < RECOVERY_RETRY_LIMIT:
        get_logger().info("Recovering containers: %s", json.dumps(services_to_be_recovered))
        services_not_recovered = []

        for service in services_to_be_recovered:
            service_parameters = kv.kv_get(service)['params']
            if not _recover_container(service_parameters):
                services_not_recovered.append(service)
            else:
                kv.kv_remove(service)
        sleep(DELAY_BETWEEN_RECOVER_RETRY_SECONDS)
        services_to_be_recovered = services_not_recovered
        recovery_retry_count += 1

    for service in services_to_be_recovered:
        kv.update_container_status('not-recovered', key=service)

    return services_to_be_recovered
def recover_containers_from_kv_store():
    services_to_be_recovered = _get_crashed_services()

    for service in services_to_be_recovered:
        update_container_status('recovering', key=service)

    recovery_retry_count = 0
    while services_to_be_recovered and recovery_retry_count < RECOVERY_RETRY_LIMIT:
        get_logger().info("Recovering containers: %s",
                          json.dumps(services_to_be_recovered))
        services_not_recovered = []

        for service in services_to_be_recovered:
            service_parameters = kv.kv_get(service)['params']
            if not _recover_container(service_parameters):
                services_not_recovered.append(service)
            else:
                kv.kv_remove(service)
        if services_not_recovered:
            sleep(
                min(
                    START_DELAY_BETWEEN_RECOVER_RETRY *
                    2**recovery_retry_count, MAX_DELAY_BETWEEN_RECOVER_RETRY))
        services_to_be_recovered = services_not_recovered
        recovery_retry_count += 1

    for service in services_to_be_recovered:
        update_container_status('not-recovered', key=service)

    return services_to_be_recovered
Esempio n. 7
0
def get_consul_config(consul_mode, ship_ips, datacenter, ship_external_ip, ship_name):
    is_server = (consul_mode != ConsulMode.CLIENT)
    config = {
        'server': is_server,
        'start_join': ship_ips,
        'datacenter': str(datacenter),
        'node_name': 'ship-{0}'.format(ship_external_ip),
        'advertise_addr': str(ship_external_ip),
        'client_addr': '0.0.0.0',
        'data_dir': '/var/opt/consul-{datacenter}-{consul_mode}'.format(**locals()),
        'leave_on_terminate': True,
        'performance': {'raft_multiplier': 1},
    }

    if consul_mode == ConsulMode.BOOTSTRAP:
        config['bootstrap_expect'] = 1

    env_pythonpath = 'PYTHONPATH=/opt/armada-docker:$PYTHONPATH'

    save_runtime_settings_cmd = '{env_pythonpath} python -m armada_backend.runtime_settings'.format(**locals())
    running_containers_parameters_path = RUNNING_CONTAINERS_PARAMETERS_PATH
    save_running_containers_cmd = ('{env_pythonpath} python -m armada_backend.save_running_containers '
                                   '{running_containers_parameters_path} '
                                   '>> /tmp/save_running_containers.out 2>&1').format(**locals())
    config['watches'] = [
        {'type': 'keyprefix', 'prefix': 'dockyard/', 'handler': save_runtime_settings_cmd},
        {'type': 'nodes', 'handler': save_runtime_settings_cmd},
        {'type': 'keyprefix', 'prefix': 'ships/{}/'.format(ship_name), 'handler': save_running_containers_cmd},
    ]

    return json.dumps(config, sort_keys=True, indent=4)
Esempio n. 8
0
 def GET(self, image_name):
     try:
         docker_api = docker_client.api()
         image_info = json.dumps(docker_api.images(image_name))
         return self.status_ok({'image_info': '{image_info}'.format(**locals())})
     except Exception as e:
         return self.status_exception("Cannot get info about image.", e)
Esempio n. 9
0
 def GET(self, image_name):
     try:
         docker_api = docker_client.api()
         image_info = json.dumps(docker_api.images(image_name))
         return self.status_ok(
             {'image_info': '{image_info}'.format(**locals())})
     except Exception as e:
         return self.status_exception("Cannot get info about image.", e)
Esempio n. 10
0
def _restart_consul():
    # Services will be registered again by their script 'register_in_service_discovery'.
    agent_self_dict = consul_query('agent/self')
    node_name = agent_self_dict['Config']['NodeName']
    request_body = json.dumps({'Node': node_name})
    consul_put('catalog/deregister', data=request_body)

    check_call(['consul', 'leave'])
    return wait_for_consul_ready()
Esempio n. 11
0
def _save_runtime_settings():
    consul_settings = {
        'is_commander': is_ship_commander(),
        'name': get_ship_name(),
        'ships': get_other_ship_ips(),
        'datacenter': get_current_datacenter(),
        'dockyards': alias.get_list(),
    }

    with open(consul_config.RUNTIME_SETTINGS_PATH, 'w') as runtime_settings:
        runtime_settings.write(json.dumps(consul_settings, sort_keys=True, indent=4))
Esempio n. 12
0
def _save_runtime_settings():
    consul_settings = {
        'is_commander': is_ship_commander(),
        'name': get_ship_name(),
        'ships': get_other_ship_ips(),
        'datacenter': get_current_datacenter(),
        'dockyards': alias.get_list(),
    }

    with open(consul_config.RUNTIME_SETTINGS_PATH, 'w') as runtime_settings:
        runtime_settings.write(json.dumps(consul_settings, sort_keys=True, indent=4))
Esempio n. 13
0
def print_result_from_armada_api(result):
    if result['status'] == 'ok':
        result_value = dict(result)
        del result_value['status']
        if result_value:
            print(json.dumps(result_value))
    else:
        if result['status'] == 'error':
            print_err(result.get('error'))
        else:
            print_err(result)
        sys.exit(1)
Esempio n. 14
0
def print_result_from_armada_api(result):
    if result['status'] == 'ok':
        result_value = dict(result)
        del result_value['status']
        if result_value:
            print(json.dumps(result_value))
    else:
        if result['status'] == 'error':
            print_err(result.get('error'))
        else:
            print_err(result)
        sys.exit(1)
Esempio n. 15
0
def _fetch_hermes_from_couriers(courier_addresses):
    my_ssh_address = get_container_ssh_address(socket.gethostname())
    for courier_address in courier_addresses:
        courier_url = 'http://{courier_address}/update_hermes'.format(**locals())
        try:
            payload = {'ssh': my_ssh_address, 'path': HERMES_DIRECTORY}
            response = requests.post(courier_url, json.dumps(payload))
            response.raise_for_status()
            if response.text.strip() != 'ok':
                raise Exception('Error response from courier:\n{}'.format(response.text))
        except Exception as e:
            get_logger().error('Fetching all sources from courier %s failed:', courier_address)
            get_logger().exception(e)
Esempio n. 16
0
def override_runtime_settings(consul_mode=None, ship_name=None, ship_ips=None, datacenter=None):
    consul_settings = {}
    if consul_mode is not None:
        consul_settings['is_commander'] = consul_mode != consul_config.ConsulMode.CLIENT
    if ship_name is not None:
        consul_settings['name'] = ship_name
    if ship_ips is not None:
        consul_settings['ships'] = ship_ips
    if datacenter is not None:
        consul_settings['datacenter'] = datacenter

    with open(consul_config.OVERRIDE_RUNTIME_SETTINGS_PATH, 'w') as runtime_settings:
        runtime_settings.write(json.dumps(consul_settings, sort_keys=True, indent=4))
Esempio n. 17
0
def override_runtime_settings(consul_mode=None, ship_name=None, ship_ips=None, datacenter=None):
    consul_settings = {}
    if consul_mode is not None:
        consul_settings['is_commander'] = consul_mode != consul_config.ConsulMode.CLIENT
    if ship_name is not None:
        consul_settings['name'] = ship_name
    if ship_ips is not None:
        consul_settings['ships'] = ship_ips
    if datacenter is not None:
        consul_settings['datacenter'] = datacenter

    with open(consul_config.OVERRIDE_RUNTIME_SETTINGS_PATH, 'w') as runtime_settings:
        runtime_settings.write(json.dumps(consul_settings, sort_keys=True, indent=4))
Esempio n. 18
0
 def on_get(self, req, resp, image_name_or_address, image_name=None):
     if image_name is None:
         dockyard_address = None
         image_name = image_name_or_address
     else:
         dockyard_address = image_name_or_address
     image = LocalArmadaImage(dockyard_address, image_name)
     try:
         docker_api = docker_client.api()
         image_info = json.dumps(docker_api.images(image.image_path))
         return self.status_ok(
             resp, {'image_info': '{image_info}'.format(**locals())})
     except Exception as e:
         return self.status_exception(resp, "Cannot get info about image.",
                                      e)
Esempio n. 19
0
def main():
    setup_sentry()
    try:
        args = _parse_args()
        _add_running_services_at_startup()
        if args.force or _check_if_we_should_recover(args.saved_containers_path):
            _load_containers_to_kv_store(args.saved_containers_path)
            not_recovered = recover_containers_from_kv_store()
            if not_recovered:
                get_logger().error("Containers not recovered: %s", json.dumps(not_recovered))
                sys.exit(1)
            get_logger().info("All containers recovered :)")
    finally:
        with open(RECOVERY_COMPLETED_PATH, 'w') as recovery_completed_file:
            recovery_completed_file.write('1')
Esempio n. 20
0
def get_consul_config(consul_mode, ship_ips, datacenter, ship_external_ip,
                      ship_name):
    is_server = (consul_mode != ConsulMode.CLIENT)
    config = {
        'server': is_server,
        'start_join': ship_ips,
        'datacenter': str(datacenter),
        'node_name': 'ship-{0}'.format(ship_external_ip),
        'advertise_addr': str(ship_external_ip),
        'client_addr': '0.0.0.0',
        'data_dir':
        '/var/opt/consul-{datacenter}-{consul_mode}'.format(**locals()),
        'leave_on_terminate': True,
        'performance': {
            'raft_multiplier': 1
        },
    }

    if consul_mode == ConsulMode.BOOTSTRAP:
        config['bootstrap_expect'] = 1

    env_pythonpath = 'PYTHONPATH=/opt/armada-docker:$PYTHONPATH'

    save_runtime_settings_cmd = '{env_pythonpath} python -m armada_backend.runtime_settings'.format(
        **locals())
    running_containers_parameters_path = RUNNING_CONTAINERS_PARAMETERS_PATH
    save_running_containers_cmd = (
        '{env_pythonpath} python -m armada_backend.save_running_containers '
        '{running_containers_parameters_path} '
        '>> /tmp/save_running_containers.out 2>&1').format(**locals())
    config['watches'] = [
        {
            'type': 'keyprefix',
            'prefix': 'dockyard/',
            'handler': save_runtime_settings_cmd
        },
        {
            'type': 'nodes',
            'handler': save_runtime_settings_cmd
        },
        {
            'type': 'keyprefix',
            'prefix': 'ships/{}/'.format(ship_name),
            'handler': save_running_containers_cmd
        },
    ]

    return json.dumps(config, sort_keys=True, indent=4)
Esempio n. 21
0
    def _create_service(self,
                        image_path=None,
                        microservice_name=None,
                        microservice_env=None,
                        microservice_app_id=None,
                        dockyard_user=None,
                        dockyard_password=None,
                        ports=None,
                        environment=None,
                        volumes=None,
                        run_command=None,
                        resource_limits=None,
                        configs=None,
                        **kwargs):
        # Check required fields in received JSON:
        if not image_path:
            raise ValueError('Field image_path cannot be empty.')
        if not run_command:
            raise ValueError('Field run_command cannot be empty.')

        if kwargs:
            get_logger().warning(
                'JSON data sent to API contains unrecognized keys: %s',
                list(kwargs.keys()))

        # Set default values:
        environment = environment or {}
        ports = ports or {}
        volumes = volumes or {}
        resource_limits = resource_limits or {}
        configs = configs or []
        image_name = split_image_path(image_path)[1]
        microservice_name = microservice_name or environment.get(
            'MICROSERVICE_NAME') or image_name
        microservice_env = microservice_env or environment.get(
            'MICROSERVICE_ENV')
        microservice_app_id = microservice_app_id or environment.get(
            'MICROSERVICE_APP_ID')

        # Update environment variables with armada-specific values:
        restart_parameters = {
            'image_path': image_path,
            'microservice_name': microservice_name,
            'microservice_env': microservice_env,
            'microservice_app_id': microservice_app_id,
            'dockyard_user': dockyard_user,
            'dockyard_password': dockyard_password,
            'ports': ports,
            'environment': environment,
            'volumes': volumes,
            'run_command': run_command,
            'resource_limits': resource_limits,
            'configs': configs,
        }

        dev = environment.get('ARMADA_DEVELOP')
        if dev:
            restart_parameters['image_path'] = image_path.split('/', 1)[-1]

        environment['ARMADA_RUN_COMMAND'] = base64.b64encode(
            run_command.encode())
        environment['IMAGE_NAME'] = image_name
        environment['MICROSERVICE_NAME'] = microservice_name
        environment['RESTART_CONTAINER_PARAMETERS'] = base64.b64encode(
            json.dumps(restart_parameters, sort_keys=True).encode())

        if microservice_env:
            environment['MICROSERVICE_ENV'] = microservice_env
        if microservice_app_id:
            environment['MICROSERVICE_APP_ID'] = microservice_app_id
        config_path, hermes_volumes = process_hermes(microservice_name,
                                                     image_name,
                                                     microservice_env,
                                                     microservice_app_id,
                                                     configs)
        if config_path:
            environment['CONFIG_PATH'] = config_path

        volumes[docker_client.
                DOCKER_SOCKET_PATH] = docker_client.DOCKER_SOCKET_PATH
        volumes.update(hermes_volumes or {})
        long_container_id = self._create_container(image_path, ports,
                                                   environment, volumes,
                                                   dockyard_user,
                                                   dockyard_password,
                                                   resource_limits)
        return long_container_id
Esempio n. 22
0
def _multiset_difference(a, b):
    a_counter = Counter(json.dumps(x, sort_keys=True) for x in a)
    b_counter = Counter(json.dumps(x, sort_keys=True) for x in b)
    difference = a_counter - b_counter
    return [json.loads(x) for x in difference.elements()]
def _multiset_difference(a, b):
    a_counter = Counter(json.dumps(x, sort_keys=True) for x in a)
    b_counter = Counter(json.dumps(x, sort_keys=True) for x in b)
    difference = a_counter - b_counter
    return [json.loads(x) for x in difference.elements()]
Esempio n. 24
0
 def status_ok(self, extra_result=None):
     extra_result = extra_result or {}
     extra_result['status'] = 'ok'
     web.header('Content-Type', 'application/json')
     return json.dumps(extra_result, indent=4, sort_keys=True)
Esempio n. 25
0
def consul_put(query, data=None, consul_address=None):
    data = data or {}
    return requests.put(__get_consul_url(query, consul_address),
                        data=json.dumps(data),
                        timeout=CONSUL_TIMEOUT_IN_SECONDS)
Esempio n. 26
0
def _create_response_with_error(error_msg=None):
    return json.dumps({'status': 'error', 'error': error_msg or ''})
Esempio n. 27
0
def consul_put(query, data, consul_address=None):
    return requests.put(__get_consul_url(query, consul_address), data=json.dumps(data),
                        timeout=CONSUL_TIMEOUT_IN_SECONDS)