def _add_running_services_at_startup(): wait_for_consul_ready() try: ship_ip, ship_name = get_ship_ip_and_name() containers_saved_in_kv = get_local_services_from_kv_store() sleep(10) all_services = consul_query('agent/services') if 'consul' in all_services: del all_services['consul'] for service_id, service_dict in six.iteritems(all_services): if ':' in service_id: continue if service_dict['Service'] == 'armada': continue key = create_consul_services_key(ship_name, service_dict['Service'], service_id) if not containers_saved_in_kv or key not in containers_saved_in_kv: save_container(ship_name, service_id, 'started', ship_ip=ship_ip) get_logger().info( 'Added running service: {}'.format(service_id)) except Exception: get_logger().exception('Unable to add running services.')
def main(): args = _parse_args() if not args.force and not _is_recovery_completed(): print('Recovery is not completed. Aborting saving running containers.') return saved_containers_path = args.saved_containers_path try: wait_for_consul_ready() containers_ids = get_local_containers_ids() containers_parameters_list = [] errors_count = 0 for container_id in containers_ids: try: container_parameters = get_container_parameters(container_id) if container_parameters: containers_parameters_list.append(container_parameters) except: errors_count += 1 print_err('ERROR on getting container parameters for {container_id}:'.format(**locals())) traceback.print_exc() # Don't overwrite saved containers' list if it would become empty because of errors. if containers_parameters_list or not errors_count: _save_containers_parameters_list_in_file(containers_parameters_list, saved_containers_path) print('Containers have been saved to {saved_containers_path}.'.format(**locals())) try: _save_containers_parameters_list_in_kv_store(containers_parameters_list) print('Containers have been saved to kv store.') except: traceback.print_exc() else: print('Aborted saving container because of errors.') except: traceback.print_exc() sys.exit(1)
def main(): setup_sentry() args = _parse_args() saved_containers_path = args.saved_containers_path try: wait_for_consul_ready() ship = get_ship_name() saved_containers = kv.kv_list('ships/{}/service/'.format(ship)) containers_parameters_dict = {} if saved_containers: for container in saved_containers: container_dict = kv.kv_get(container) containers_parameters_dict[container] = container_dict if containers_parameters_dict: try: _save_containers_parameters_list_in_kv_store(containers_parameters_dict) get_logger().info('Containers have been saved to kv store.') except Exception as e: get_logger().exception(e) if not args.force and not _is_recovery_completed(): get_logger().warning('Recovery is not completed. Aborting saving running containers.') return _save_containers_parameters_list_in_file(containers_parameters_dict, saved_containers_path) get_logger().info('Containers have been saved to {}.'.format(saved_containers_path)) else: get_logger().info('Aborted saving container because of errors.') except Exception as e: get_logger().exception(e) sys.exit(1)
def _load_from_list(saved_containers, ship): wait_for_consul_ready() running_containers = _get_local_running_containers() containers_to_be_added = _multiset_difference(saved_containers, running_containers) for container_parameters in containers_to_be_added: get_logger().info('Added service: {}'.format(container_parameters)) kv.save_container(ship, _generate_id(), 'crashed', params=container_parameters)
def main(): setup_sentry() args = _parse_args() saved_containers_path = args.saved_containers_path if not args.force and not _is_recovery_completed(): get_logger().info( 'Recovery is not completed. Aborting saving running containers.') return try: wait_for_consul_ready() saved_containers = get_local_services() containers_parameters_dict = {} for container in saved_containers: container_dict = kv.kv_get(container) containers_parameters_dict[container] = container_dict if not containers_parameters_dict: get_logger().info( 'Aborted saving container because list is empty.') return _save_containers_parameters_list_in_file(containers_parameters_dict, saved_containers_path) get_logger().info( 'Containers have been saved to {}.'.format(saved_containers_path)) except Exception as e: get_logger().exception(e) sys.exit(1)
def main(): setup_sentry() args = _parse_args() saved_containers_path = args.saved_containers_path if not args.force and not _is_recovery_completed(): get_logger().info( 'Recovery is not completed. Aborting saving running containers.') return try: wait_for_consul_ready() services_key = 'services/{}'.format(get_ship_name()) containers_parameters = kv.kv_get_recurse(services_key, strip_keys=False) if not containers_parameters: get_logger().info( 'Aborted saving container because list is empty.') return _save_containers_parameters_list_in_file(containers_parameters, saved_containers_path) get_logger().info( 'Containers have been saved to {}.'.format(saved_containers_path)) except Exception as e: get_logger().exception(e) sys.exit(1)
def main(): args = _parse_args() if not args.force and not _is_recovery_completed(): get_logger().warning('Recovery is not completed. Aborting saving running containers.') return saved_containers_path = args.saved_containers_path try: wait_for_consul_ready() containers_ids = get_local_containers_ids() containers_parameters_list = [] errors_count = 0 for container_id in containers_ids: try: container_parameters = get_container_parameters(container_id) if container_parameters: containers_parameters_list.append(container_parameters) except: errors_count += 1 get_logger().error('ERROR on getting container parameters for {}:'.format(container_id)) traceback.print_exc() containers_parameters_list.sort() # Don't overwrite saved containers' list if it would become empty because of errors. if containers_parameters_list or not errors_count: _save_containers_parameters_list_in_file(containers_parameters_list, saved_containers_path) get_logger().info('Containers have been saved to {}.'.format(saved_containers_path)) try: _save_containers_parameters_list_in_kv_store(containers_parameters_list) get_logger().info('Containers have been saved to kv store.') except: traceback.print_exc() else: get_logger().info('Aborted saving container because of errors.') except: traceback.print_exc() sys.exit(1)
def _load_from_list(saved_containers, ship_name, ship_ip): wait_for_consul_ready() running_containers = _get_local_running_containers() containers_to_be_added = _multiset_difference(saved_containers, running_containers) for container_parameters in containers_to_be_added: get_logger().info('Added service: {}'.format(container_parameters)) save_container(ship_name, _generate_id(), 'crashed', params=container_parameters, ship_ip=ship_ip)
def _load_from_list(saved_containers, ship): wait_for_consul_ready() running_containers = _get_local_running_containers() containers_to_be_added = _multiset_difference(saved_containers, running_containers) index = 0 for container_parameters in containers_to_be_added: kv.save_service(ship, str(index), 'crashed', params=container_parameters) index += 1
def _load_containers_to_kv_store(saved_containers_path): wait_for_consul_ready() try: ship_ip, ship_name = get_ship_ip_and_name() saved_containers = _load_saved_containers_parameters(saved_containers_path) _load_from_dict(saved_containers, ship_name, ship_ip) except: get_logger().exception('Unable to load from %s', saved_containers_path)
def recover_saved_containers_from_parameters(saved_containers): wait_for_consul_ready() try: ship = get_ship_name() _load_from_dict(saved_containers, ship) except Exception as e: get_logger().exception(e) containers_to_be_recovered = recover_containers_from_kv_store() return containers_to_be_recovered
def _load_containers_to_kv_store(saved_containers_path): wait_for_consul_ready() try: ship = get_ship_name() saved_containers = _load_saved_containers_parameters_list(saved_containers_path) if isinstance(saved_containers, dict): _load_from_dict(saved_containers, ship) else: _load_from_list(saved_containers, ship) except: get_logger().exception('Unable to load from %s', saved_containers_path)
def _recover_saved_containers_from_path(saved_containers_path): wait_for_consul_ready() try: not_recovered = recover_containers_from_kv_store() if not_recovered: get_logger().error('Following containers were not recovered: %s', not_recovered) return False else: return True except: get_logger().exception('Unable to recover from %s.', saved_containers_path) return False
def _load_containers_to_kv_store(saved_containers_path): wait_for_consul_ready() try: ship = get_ship_name() saved_containers = _load_saved_containers_parameters_list( saved_containers_path) if isinstance(saved_containers, dict): _load_from_dict(saved_containers, ship) else: _load_from_list(saved_containers, ship) except: get_logger().exception('Unable to load from %s', saved_containers_path)
def recover_saved_containers_from_parameters(saved_containers): wait_for_consul_ready() try: ship = get_ship_name() if isinstance(saved_containers, dict): _load_from_dict(saved_containers, ship) else: _load_from_list(saved_containers, ship) except Exception as e: get_logger().exception(e) containers_to_be_recovered = recover_containers_from_kv_store() return containers_to_be_recovered
def _recover_saved_containers_from_path(saved_containers_path): wait_for_consul_ready() try: not_recovered = recover_containers_from_kv_store() if not_recovered: get_logger().error('Following containers were not recovered: {}'.format(not_recovered)) return False else: return True except: traceback.print_exc() get_logger().error('Unable to recover from {}.'.format(saved_containers_path)) return False
def recover_saved_containers_from_parameters(saved_containers): wait_for_consul_ready() try: ship = get_ship_name() if isinstance(saved_containers, dict): _load_from_dict(saved_containers, ship) else: _load_from_list(saved_containers, ship) except: traceback.print_exc() containers_to_be_recovered = recover_containers_from_kv_store() return containers_to_be_recovered
def _load_containers_to_kv_store(saved_containers_path): wait_for_consul_ready() try: ship = get_ship_name() containers_saved_in_kv = kv.kv_list('ships/{}/service/'.format(ship)) saved_containers = _load_saved_containers_parameters_list(saved_containers_path) _add_running_services_at_startup(containers_saved_in_kv, ship) if isinstance(saved_containers, dict): _load_from_dict(saved_containers, containers_saved_in_kv, ship) else: _load_from_list(saved_containers, ship) except: traceback.print_exc() get_logger().error('Unable to load from {}.'.format(saved_containers_path))
def recover_saved_containers(saved_containers): wait_for_consul_ready() running_containers = _get_local_running_containers() containers_to_be_recovered = _multiset_difference(saved_containers, running_containers) recovery_retry_count = 0 while containers_to_be_recovered and recovery_retry_count < RECOVERY_RETRY_LIMIT: print_err("Recovering containers: {}".format(json.dumps(containers_to_be_recovered))) for container_parameters in containers_to_be_recovered: _recover_container(container_parameters) sleep(DELAY_BETWEEN_RECOVER_RETRY_SECONDS) running_containers = _get_local_running_containers() containers_to_be_recovered = _multiset_difference(saved_containers, running_containers) recovery_retry_count += 1 return containers_to_be_recovered
def _load_containers_to_kv_store(saved_containers_path): wait_for_consul_ready() try: ship = get_ship_name() saved_containers = _load_saved_containers_parameters_list( saved_containers_path) if isinstance(saved_containers, dict): _load_from_dict(saved_containers, ship) else: _load_from_list(saved_containers, ship) except: traceback.print_exc() get_logger().error( 'Unable to load from {}.'.format(saved_containers_path))
def recover_saved_containers_from_parameters(saved_containers): wait_for_consul_ready() try: ship = get_ship_name() containers_saved_in_kv = kv.kv_list('ships/{}/service/'.format(ship)) if isinstance(saved_containers, dict): _load_from_dict(saved_containers, containers_saved_in_kv, ship) else: _load_from_list(saved_containers, ship) except: traceback.print_exc() containers_to_be_recovered = recover_containers_from_kv_store() return containers_to_be_recovered
def _add_running_services_at_startup(containers_saved_in_kv, ship): wait_for_consul_ready() # wait for registering services sleep(10) all_services = consul_query('agent/services') del all_services['consul'] for service_id, service_dict in all_services.items(): if ':' in service_id: continue if service_dict['Service'] == 'armada': continue key = 'ships/{}/service/{}/{}'.format(ship, service_dict['Service'], service_id) if not containers_saved_in_kv or key not in containers_saved_in_kv: kv.save_service(ship, service_id, 'started')
def _recover_saved_containers_from_path(saved_containers_path): wait_for_consul_ready() try: not_recovered = recover_containers_from_kv_store() if not_recovered: get_logger().error( 'Following containers were not recovered: {}'.format( not_recovered)) return False else: return True except: traceback.print_exc() get_logger().error( 'Unable to recover from {}.'.format(saved_containers_path)) return False
def recover_saved_containers(saved_containers): wait_for_consul_ready() running_containers = _get_local_running_containers() containers_to_be_recovered = _multiset_difference(saved_containers, running_containers) recovery_retry_count = 0 while containers_to_be_recovered and recovery_retry_count < RECOVERY_RETRY_LIMIT: print_err("Recovering containers: {}".format( json.dumps(containers_to_be_recovered))) for container_parameters in containers_to_be_recovered: _recover_container(container_parameters) sleep(DELAY_BETWEEN_RECOVER_RETRY_SECONDS) running_containers = _get_local_running_containers() containers_to_be_recovered = _multiset_difference( saved_containers, running_containers) recovery_retry_count += 1 return containers_to_be_recovered
def _add_running_services_at_startup(): wait_for_consul_ready() try: ship = get_ship_name() containers_saved_in_kv = kv.kv_list('ships/{}/service/'.format(ship)) sleep(10) all_services = consul_query('agent/services') del all_services['consul'] for service_id, service_dict in all_services.items(): if ':' in service_id: continue if service_dict['Service'] == 'armada': continue key = 'ships/{}/service/{}/{}'.format(ship, service_dict['Service'], service_id) if not containers_saved_in_kv or key not in containers_saved_in_kv: kv.save_container(ship, service_id, 'started') get_logger().info('Added running service: {}'.format(service_id)) except: get_logger().exception('Unable to add running services.')
def recover_saved_containers(saved_containers): wait_for_consul_ready() running_containers = _get_local_running_containers() containers_to_be_recovered = _multiset_difference(saved_containers, running_containers) recovery_retry_count = 0 while containers_to_be_recovered and recovery_retry_count < RECOVERY_RETRY_LIMIT: get_logger().info("Recovering containers: {}".format( json.dumps(containers_to_be_recovered))) containers_not_recovered = [] counter_to_be_recovered = Counter( json.dumps(x, sort_keys=True) for x in containers_to_be_recovered) to_be_recovered = [] for container_parameters in counter_to_be_recovered.elements(): try: if to_be_recovered[-1][0] == container_parameters: index = to_be_recovered[-1][1] + 1 else: index = 0 except IndexError: index = 0 to_be_recovered.append((container_parameters, index)) name = json.loads(container_parameters)['microservice_name'] kv.save_service(name, index, 'recovering', json.loads(container_parameters)) for container_parameters, index in to_be_recovered: container_parameters = json.loads(container_parameters) name = container_parameters['microservice_name'] if not _recover_container(container_parameters): containers_not_recovered.append(container_parameters) if recovery_retry_count == (RECOVERY_RETRY_LIMIT - 1): kv.save_service(name, index, 'not-recovered', json.loads(container_parameters)) else: kv.kv_remove('service/{}/{}'.format(name, index)) sleep(DELAY_BETWEEN_RECOVER_RETRY_SECONDS) running_containers = _get_local_running_containers() containers_to_be_recovered = _multiset_difference( containers_not_recovered, running_containers) recovery_retry_count += 1 return containers_to_be_recovered
def _add_running_services_at_startup(): wait_for_consul_ready() try: ship = get_ship_name() containers_saved_in_kv = kv.kv_list('ships/{}/service/'.format(ship)) sleep(10) all_services = consul_query('agent/services') del all_services['consul'] for service_id, service_dict in all_services.items(): if ':' in service_id: continue if service_dict['Service'] == 'armada': continue key = 'ships/{}/service/{}/{}'.format(ship, service_dict['Service'], service_id) if not containers_saved_in_kv or key not in containers_saved_in_kv: kv.save_container(ship, service_id, 'started') get_logger().info( 'Added running service: {}'.format(service_id)) except: get_logger().exception('Unable to add running services.')
def recover_saved_containers(saved_containers): wait_for_consul_ready() running_containers = _get_local_running_containers() containers_to_be_recovered = _multiset_difference(saved_containers, running_containers) recovery_retry_count = 0 while containers_to_be_recovered and recovery_retry_count < RECOVERY_RETRY_LIMIT: get_logger().info("Recovering containers: {}".format(json.dumps(containers_to_be_recovered))) containers_not_recovered = [] counter_to_be_recovered = Counter(json.dumps(x, sort_keys=True) for x in containers_to_be_recovered) to_be_recovered = [] for container_parameters in counter_to_be_recovered.elements(): try: if to_be_recovered[-1][0] == container_parameters: index = to_be_recovered[-1][1] + 1 else: index = 0 except IndexError: index = 0 to_be_recovered.append((container_parameters, index)) name = json.loads(container_parameters)['microservice_name'] kv.save_service(name, index, 'recovering', json.loads(container_parameters)) for container_parameters, index in to_be_recovered: container_parameters = json.loads(container_parameters) name = container_parameters['microservice_name'] if not _recover_container(container_parameters): containers_not_recovered.append(container_parameters) if recovery_retry_count == (RECOVERY_RETRY_LIMIT - 1): kv.save_service(name, index, 'not-recovered', json.loads(container_parameters)) else: kv.kv_remove('service/{}/{}'.format(name, index)) sleep(DELAY_BETWEEN_RECOVER_RETRY_SECONDS) running_containers = _get_local_running_containers() containers_to_be_recovered = _multiset_difference(containers_not_recovered, running_containers) recovery_retry_count += 1 return containers_to_be_recovered