def recover_containers_from_kv_store():
    services_to_be_recovered = _get_crashed_services()

    for service in services_to_be_recovered:
        kv.update_service_status('recovering', key=service)

    recovery_retry_count = 0
    while services_to_be_recovered and recovery_retry_count < RECOVERY_RETRY_LIMIT:
        get_logger().info("Recovering containers: %s", json.dumps(services_to_be_recovered))
        services_not_recovered = []

        for service in services_to_be_recovered:
            service_parameters = kv.kv_get(service)['params']
            if not _recover_container(service_parameters):
                services_not_recovered.append(service)
            else:
                kv.kv_remove(service)
        sleep(DELAY_BETWEEN_RECOVER_RETRY_SECONDS)
        services_to_be_recovered = services_not_recovered
        recovery_retry_count += 1

    for service in services_to_be_recovered:
        kv.update_service_status('not-recovered', key=service)

    return services_to_be_recovered
Ejemplo n.º 2
0
def recover_containers_from_kv_store():
    services_to_be_recovered = _get_crashed_services()

    for service in services_to_be_recovered:
        kv.update_service_status('recovering', key=service)

    recovery_retry_count = 0
    while services_to_be_recovered and recovery_retry_count < RECOVERY_RETRY_LIMIT:
        get_logger().info("Recovering containers: {}".format(
            json.dumps(services_to_be_recovered)))
        services_not_recovered = []

        for service in services_to_be_recovered:
            service_parameters = kv.kv_get(service)['params']
            if not _recover_container(service_parameters):
                services_not_recovered.append(service)
                if recovery_retry_count == (RECOVERY_RETRY_LIMIT - 1):
                    kv.update_service_status('not-recovered', key=service)
            else:
                kv.kv_remove(service)
        sleep(DELAY_BETWEEN_RECOVER_RETRY_SECONDS)
        services_to_be_recovered = _get_crashed_services()
        recovery_retry_count += 1

    return services_to_be_recovered
Ejemplo n.º 3
0
def _deregister_not_running_services():
    try:
        ship = get_ship_name()
    except:
        ship = get_ship_ip()
    services = _get_local_services()
    running_containers_ids = _get_running_container_ids()
    for service_id in services.keys():
        container_id, is_subservice = _get_container_id_with_subservice(service_id)
        if container_id in running_containers_ids:
            continue
        if not is_subservice:
            name = services[service_id]['Service']
            kv.update_service_status('crashed', ship=ship, name=name, container_id=container_id)
        deregister_services(container_id)

    services_ids = kv.kv_list('ships/{}/service/'.format(ship)) or []
    for service_id in services_ids:
        container_id = service_id.split('/')[-1]
        if container_id not in running_containers_ids:
            kv.update_service_status('crashed', key=service_id)
            deregister_services(container_id)
Ejemplo n.º 4
0
def _deregister_not_running_services():
    try:
        ship = get_ship_name()
    except:
        ship = get_ship_ip()
    services = _get_local_services()
    running_containers_ids = _get_running_container_ids()
    for service_id in services.keys():
        container_id, is_subservice = _get_container_id_with_subservice(service_id)
        if container_id in running_containers_ids:
            continue
        if not is_subservice:
            name = services[service_id]['Service']
            kv.update_service_status('crashed', ship=ship, name=name, container_id=container_id)
        deregister_services(container_id)

    services_ids = kv.kv_list('ships/{}/service/'.format(ship)) or []
    containers_ids = _get_running_container_ids()
    for service_id in services_ids:
        container_id = service_id.split('/')[-1]
        if container_id not in containers_ids:
            kv.update_service_status('crashed', key=service_id)
            deregister_services(container_id)