def reload_tyr_safe_all(safe=True, reverse=False): """ Reload tyr on all servers, in a safe way if load balancers are available """ safe = get_bool_from_cli(safe) for server in (env.roledefs['tyr'][::-1] if reverse else env.roledefs['tyr']): execute(reload_tyr_safe, server, safe)
def redeploy_all_krakens(create=True): """ Redistributes all krakens on eng engines according to zmq_server parameters """ create = get_bool_from_cli(create) for instance in env.instances.values(): redeploy_kraken(instance, create)
def reload_jormun_safe_all(safe=True): """ Reload jormungandr on all servers, in a safe way if load balancers are available """ safe = get_bool_from_cli(safe) for server in env.roledefs['ws']: execute(reload_jormun_safe, server, safe)
def rollback_instance(instance, test=True): """ Use this only if something goes wrong during deployment of an instance """ test = get_bool_from_cli(test) instance = get_real_instance(instance) execute(swap_data_nav, instance, force=True) execute(set_kraken_binary, instance, old=True) execute(restart_kraken, instance, wait=env.KRAKEN_RESTART_SCHEME if test else 'no_test')
def test_all_krakens(wait=False): """test all kraken instances""" wait = get_bool_from_cli(wait) for instance in env.instances.values(): test_kraken(instance, fail_if_error=False, wait=wait, loaded_is_ok=True)
def restart_all_krakens(wait=True): """restart and test all kraken instances""" wait = get_bool_from_cli(wait) start_or_stop_with_delay('apache2', env.APACHE_START_DELAY * 1000, 500, only_once=env.APACHE_START_ONLY_ONCE) for instance in env.instances.values(): restart_kraken(instance.name, wait=wait)
def test_kraken(instance, fail_if_error=True, wait=False, loaded_is_ok=None): """Test kraken with '?instance='""" instance = get_real_instance(instance) wait = get_bool_from_cli(wait) # env.host will call the monitor kraken on the current host request = Request('http://{}:{}/{}/?instance={}'.format(env.host, env.kraken_monitor_port, env.kraken_monitor_location_dir, instance.name)) if wait: # we wait until we get a gestion and the instance is 'loaded' try: result = Retrying(stop_max_delay=env.KRAKEN_RESTART_DELAY * 1000, wait_fixed=1000, retry_on_result=lambda x: x is None or not x['loaded']) \ .call(_test_kraken, request, fail_if_error) except Exception as e: print(red("ERROR: could not reach {}, too many retries ! ({})".format(instance.name, e))) result = {'status': False} else: result = _test_kraken(request, fail_if_error) if result['status'] != 'running': if result['status'] == 'no_data': print(yellow("WARNING: instance {} has no loaded data".format(instance.name))) return False if fail_if_error: print(red("ERROR: Instance {} is not running ! ({})".format(instance.name, result))) return False print(yellow("WARNING: Instance {} is not running ! ({})".format(instance.name, result))) return False if not result['is_connected_to_rabbitmq']: print(yellow("WARNING: Instance {} is not connected to rabbitmq".format(instance.name))) return False if loaded_is_ok is None: loaded_is_ok = wait if not loaded_is_ok: if result['loaded']: print(yellow("WARNING: instance {} has loaded data".format(instance.name))) return True else: print(green("OK: instance {} has correct values: {}".format(instance.name, result))) return False else: if result['loaded']: print(green("OK: instance {} has correct values: {}".format(instance.name, result))) return True elif fail_if_error: print(red("CRITICAL: instance {} has no loaded data".format(instance.name))) exit(1) else: print(yellow("WARNING: instance {} has no loaded data".format(instance.name))) return False
def upgrade_kraken(kraken_wait=True, up_confs=True): """Upgrade and restart all kraken instances""" kraken_wait = get_bool_from_cli(kraken_wait) execute(kraken.upgrade_engine_packages) execute(kraken.upgrade_monitor_kraken_packages) execute(kraken.restart_all_krakens, wait=kraken_wait) if up_confs: execute(kraken.update_monitor_configuration) for instance in env.instances.values(): execute(kraken.update_eng_instance_conf, instance) execute(kraken.restart_all_krakens, wait=kraken_wait)
def reload_tyr_safe(server, safe=True): """ Reload tyr on a specific server, in a safe way if load balancers are available """ safe = get_bool_from_cli(safe) with settings(host_string=server): if env.use_load_balancer and safe: load_balancer.disable_node(server) sudo("service apache2 reload") if env.use_load_balancer and safe: load_balancer.enable_node(server)
def reload_tyr_safe(server, safe=True): """ Reload tyr on a specific server, in a safe way if load balancers are available """ safe = get_bool_from_cli(safe) with settings(host_string=server): if env.use_load_balancer and safe: load_balancer.disable_node(server) restart_apache() if env.use_load_balancer and safe: load_balancer.enable_node(server)
def update_all_instances(kraken_wait=True): """ update all the instances if the instance does not exists, deploy it TODO: we could detect the deleted instances to remove them """ kraken_wait = get_bool_from_cli(kraken_wait) print(blue('creating all instances')) for instance in env.instances.values(): execute(update_instance, instance) execute(kraken.restart_all_krakens, wait=kraken_wait)
def reload_jormun_safe(server, safe=True): """ Reload jormungandr on a specific server, in a safe way if load balancers are available """ safe = get_bool_from_cli(safe) with settings(host_string=server): if env.use_load_balancer and safe: load_balancer.disable_node(server) sudo("service apache2 reload") sleep(1) if env.use_load_balancer and safe: load_balancer.enable_node(server)
def upgrade_kraken(kraken_wait=True, up_confs=True, supervision=False): """Upgrade and restart all kraken instances""" if supervision: supervision_downtime(step='kraken') kraken_wait = get_bool_from_cli(kraken_wait) execute(kraken.upgrade_engine_packages) execute(kraken.upgrade_monitor_kraken_packages) if up_confs: execute(kraken.update_monitor_configuration) for instance in env.instances.values(): execute(kraken.update_eng_instance_conf, instance) execute(kraken.restart_all_krakens, wait=kraken_wait)
def redeploy_kraken(instance, create=True): """ Redistributes an existing kraken on eng engines. Call this task when the zmq_server parameter of add_instance is changed. Use create=False if krakens mapping is reduced (this avoids restarting them). Use create=True if krakens are displaced or mapping is expanded. """ instance = get_real_instance(instance) create = get_bool_from_cli(create) if create: execute(create_eng_instance, instance) execute(remove_kraken_instance, instance, purge_logs=True, apply_on='reverse')
def upgrade_all(bina=True, up_tyr=True, up_confs=True, kraken_wait=True): """Upgrade all navitia packages, databases and launch rebinarisation of all instances """ bina = get_bool_from_cli(bina) up_tyr = get_bool_from_cli(up_tyr) up_confs = get_bool_from_cli(up_confs) kraken_wait = get_bool_from_cli(kraken_wait) if env.use_load_balancer: get_adc_credentials() with utils.send_mail(): execute(check_last_dataset) if up_tyr: execute(upgrade_tyr, up_confs=up_confs) execute(upgrade_monitor_kraken_packages) if bina: execute(tyr.launch_rebinarization_upgrade) if env.use_load_balancer: # Upgrade kraken/jormun on first hosts set env.roledefs['eng'] = env.eng_hosts_1 env.roledefs['ws'] = env.ws_hosts_1 execute(switch_to_first_phase, env.eng_hosts_1, env.ws_hosts_1, env.ws_hosts_2) execute(upgrade_kraken, kraken_wait=kraken_wait, up_confs=up_confs) execute(upgrade_jormungandr, reload=False, up_confs=up_confs) # Upgrade kraken/jormun on remaining hosts env.roledefs['eng'] = env.eng_hosts_2 env.roledefs['ws'] = env.ws_hosts_2 execute(switch_to_second_phase, env.eng_hosts_1, env.eng_hosts_2, env.ws_hosts_1, env.ws_hosts_2) execute(upgrade_kraken, kraken_wait=kraken_wait, up_confs=up_confs) execute(upgrade_jormungandr, reload=False, up_confs=up_confs) execute(enable_all_nodes, env.eng_hosts, env.ws_hosts_1, env.ws_hosts_2) env.roledefs['eng'] = env.eng_hosts env.roledefs['ws'] = env.ws_hosts else: execute(upgrade_kraken, kraken_wait=kraken_wait, up_confs=up_confs) execute(upgrade_jormungandr, up_confs=up_confs)
def restart_kraken(instance, test=True, wait=True): """Restart a kraken instance on a given server To let us not restart all kraken servers in the farm """ instance = get_real_instance(instance) wait = get_bool_from_cli(wait) if instance.name not in env.excluded_instances: for host in set(instance.kraken_engines).intersection(env.roledefs['eng']): restart_kraken_on_host(instance, host) if test: test_kraken(instance, fail_if_error=False, wait=wait) else: print(yellow("{} has no data, not testing it".format(instance.name)))
def restart_kraken(instance, test=True, wait=True): """Restart a kraken instance on a given server To let us not restart all kraken servers in the farm """ instance = get_real_instance(instance) wait = get_bool_from_cli(wait) if instance.name not in env.excluded_instances: kraken = 'kraken_' + instance.name start_or_stop_with_delay(kraken, 4000, 500, start=False, only_once=True) start_or_stop_with_delay(kraken, 4000, 500, only_once=env.KRAKEN_START_ONLY_ONCE) if test: test_kraken(instance.name, fail_if_error=False, wait=wait) else: print(yellow("{} has no data, not testing it".format(instance.name)))
def upgrade_all(bina=True, up_tyr=True, up_confs=True, kraken_wait=True): """Upgrade all navitia packages, databases and launch rebinarisation of all instances """ bina = get_bool_from_cli(bina) up_tyr = get_bool_from_cli(up_tyr) up_confs = get_bool_from_cli(up_confs) kraken_wait = get_bool_from_cli(kraken_wait) if env.use_load_balancer: get_adc_credentials() with utils.send_mail(): execute(check_last_dataset) if up_tyr: execute(upgrade_tyr, up_confs=up_confs) execute(upgrade_monitor_kraken_packages) if bina: execute(tyr.launch_rebinarization_upgrade) if env.use_load_balancer: # Upgrade kraken/jormun on first hosts set env.roledefs['eng'] = env.eng_hosts_1 env.roledefs['ws'] = env.ws_hosts_1 execute(switch_to_first_phase, env.eng_hosts_1, env.ws_hosts_1, env.ws_hosts_2) execute(upgrade_kraken, kraken_wait=kraken_wait, up_confs=up_confs) execute(upgrade_jormungandr, reload=False, up_confs=up_confs) # Upgrade kraken/jormun on remaining hosts env.roledefs['eng'] = env.eng_hosts_2 env.roledefs['ws'] = env.ws_hosts_2 execute(switch_to_second_phase, env.eng_hosts_1, env.eng_hosts_2, env.ws_hosts_1, env.ws_hosts_2) execute(upgrade_kraken, kraken_wait=kraken_wait, up_confs=up_confs) execute(upgrade_jormungandr, reload=False, up_confs=up_confs) execute(enable_all_nodes, env.eng_hosts, env.ws_hosts) env.roledefs['eng'] = env.eng_hosts env.roledefs['ws'] = env.ws_hosts else: execute(upgrade_kraken, kraken_wait=kraken_wait, up_confs=up_confs) execute(upgrade_jormungandr, up_confs=up_confs)
def update_instance(instance, reload_jormun=True): """ param (instance) - update all configuration and restart all services does not deploy any packages """ instance = get_real_instance(instance) reload_jormun = get_bool_from_cli(reload_jormun) print(blue('updating {}'.format(instance.name))) #first of all we compute the instance status, it will be helpfull later execute(compute_instance_status, instance) execute(tyr.create_tyr_instance, instance) execute(db.postgis_initdb, instance.db_name) execute(tyr.update_ed_db, instance.name) execute(jormungandr.deploy_jormungandr_instance_conf, instance) execute(kraken.create_eng_instance, instance) execute(tyr.deploy_default_synonyms, instance) execute(db.create_privileges_instance_db, instance) if reload_jormun: execute(jormungandr.reload_jormun_safe_all)
def reload_jormun_safe(server, safe=True): """ Reload jormungandr on a specific server, in a safe way if load balancers are available """ safe = get_bool_from_cli(safe) with settings(host_string=server): if env.use_load_balancer and safe: load_balancer.disable_node(server) # Restart uWSGI if set. if env.uwsgi_enable: restart_uwsgi('jormungandr') else: restart_apache() if env.use_load_balancer and safe: load_balancer.enable_node(server)
def restart_all_krakens(wait=True): """restart and test all kraken instances""" wait = get_bool_from_cli(wait) execute(require_monitor_kraken_started) for instance in env.instances.values(): restart_kraken(instance, wait=wait)
def upgrade_all(up_tyr=True, up_confs=True, kraken_wait=True, check_version=True, send_mail='no', manual_lb=False, check_dead=True): """Upgrade all navitia packages, databases and launch rebinarisation of all instances """ check_version = get_bool_from_cli(check_version) up_tyr = get_bool_from_cli(up_tyr) up_confs = get_bool_from_cli(up_confs) kraken_wait = get_bool_from_cli(kraken_wait) if check_version: execute(compare_version_candidate_installed, host_name='tyr') if env.use_load_balancer: if manual_lb: print(yellow("WARNING : you are in MANUAL mode :\n" "Check frequently for message asking you to switch nodes manually")) else: # check credential NOW _adc_connection(check=True) execute(check_last_dataset) if send_mail in ('start', 'all'): broadcast_email('start') time_dict = TimeCollector() time_dict.register_start('total_deploy') if up_tyr: execute(tyr.stop_tyr_beat) execute(upgrade_tyr, up_confs=up_confs, pilot_tyr_beat=False) time_dict.register_start('bina') execute(tyr.launch_rebinarization_upgrade, pilot_tyr_beat=False) time_dict.register_end('bina') if check_version: execute(compare_version_candidate_installed) execute(kraken.swap_all_data_nav) if env.use_load_balancer: # Upgrade kraken/jormun on first hosts set env.roledefs['eng'] = env.eng_hosts_1 env.roledefs['ws'] = env.ws_hosts_1 if manual_lb: raw_input(yellow("Please disable ENG1/WS1 and enable ENG2-4/WS2-4")) else: execute(switch_to_first_phase, env.eng_hosts_1, env.ws_hosts_1, env.ws_hosts_2) time_dict.register_start('kraken') execute(upgrade_kraken, kraken_wait=kraken_wait, up_confs=up_confs, supervision=True) if check_dead: execute(check_dead_instances) execute(upgrade_jormungandr, reload=False, up_confs=up_confs) # Upgrade kraken/jormun on remaining hosts env.roledefs['eng'] = env.eng_hosts_2 env.roledefs['ws'] = env.ws_hosts_2 if manual_lb: raw_input(yellow("Please enable ENG1/WS1 and disable ENG2-4/WS2-4")) else: execute(switch_to_second_phase, env.eng_hosts_1, env.eng_hosts_2, env.ws_hosts_1, env.ws_hosts_2) execute(upgrade_kraken, kraken_wait=kraken_wait, up_confs=up_confs) time_dict.register_end('kraken') execute(upgrade_jormungandr, reload=False, up_confs=up_confs) if not manual_lb: execute(enable_all_nodes, env.eng_hosts, env.ws_hosts_1, env.ws_hosts_2) env.roledefs['eng'] = env.eng_hosts env.roledefs['ws'] = env.ws_hosts else: time_dict.register_start('kraken') execute(upgrade_kraken, kraken_wait=kraken_wait, up_confs=up_confs, supervision=True) time_dict.register_end('kraken') execute(upgrade_jormungandr, up_confs=up_confs) if up_tyr: execute(tyr.start_tyr_beat) time_dict.register_end('total_deploy') if send_mail in ('end', 'all'): warn_dict = jormungandr.check_kraken_jormun_after_deploy() status = show_dead_kraken_status(warn_dict, show=True) status += show_time_deploy(time_dict) broadcast_email('end', status) if env.use_load_balancer and manual_lb: print(yellow("Please enable ENG1-4/WS1-4"))
def test_all_krakens(wait=False): """test all kraken instances""" wait = get_bool_from_cli(wait) for instance in env.instances.values(): test_kraken(instance.name, fail_if_error=False, wait=wait, loaded_is_ok=True)
def test_kraken(instance, fail_if_error=True, wait=False, loaded_is_ok=None, hosts=None): """Test kraken with '?instance='""" instance = get_real_instance(instance) wait = get_bool_from_cli(wait) hosts = [h.split('@')[1] for h in hosts or instance.kraken_engines] will_return = len(hosts) == 1 for host in hosts: request = 'http://{}:{}/{}/?instance={}'.format( host, env.kraken_monitor_port, env.kraken_monitor_location_dir, instance.name) if wait: # we wait until we get a response and the instance is 'loaded' try: result = Retrying(stop_max_delay=env.KRAKEN_RESTART_DELAY * 1000, wait_fixed=1000, retry_on_result=lambda x: x is None or not x['loaded']) \ .call(_test_kraken, request, fail_if_error) except Exception as ex: print( red("ERROR: could not reach {}, too many retries ! ({})". format(instance.name, ex))) result = {'status': False} else: result = _test_kraken(request, fail_if_error) try: if result['status'] != 'running': if result['status'] == 'no_data': print( yellow( "WARNING: instance {} has no loaded data".format( instance.name))) if will_return: return False if fail_if_error: print( red("ERROR: Instance {} is not running ! ({})".format( instance.name, result))) if will_return: return False print( yellow("WARNING: Instance {} is not running ! ({})".format( instance.name, result))) if will_return: return False if not result['is_connected_to_rabbitmq']: print( yellow("WARNING: Instance {} is not connected to rabbitmq". format(instance.name))) if will_return: return False if loaded_is_ok is None: loaded_is_ok = wait if not loaded_is_ok: if result['loaded']: print( yellow("WARNING: instance {} has loaded data".format( instance.name))) if will_return: return True else: print( green("OK: instance {} has correct values: {}".format( instance.name, result))) if will_return: return False else: if result['loaded']: print( green("OK: instance {} has correct values: {}".format( instance.name, result))) if will_return: return True elif fail_if_error: abort( red("CRITICAL: instance {} has no loaded data".format( instance.name))) else: print( yellow( "WARNING: instance {} has no loaded data".format( instance.name))) if will_return: return False except KeyError: print( red("CRITICAL: instance {} does not return a correct result". format(instance.name))) print(result) if fail_if_error: abort('') return False
def upgrade_all(up_tyr=True, up_confs=True, check_version=True, send_mail='no', manual_lb=False, check_dead=True, check_bina=True): """Upgrade all navitia packages, databases and launch rebinarisation of all instances """ up_tyr = get_bool_from_cli(up_tyr) up_confs = get_bool_from_cli(up_confs) check_version = get_bool_from_cli(check_version) check_dead = get_bool_from_cli(check_dead) check_bina = get_bool_from_cli(check_bina) if check_version: execute(compare_version_candidate_installed, host_name='tyr') if env.use_load_balancer: if manual_lb: print(yellow("WARNING : you are in MANUAL mode :\n" "Check frequently for message asking you to switch nodes manually")) else: # check credential NOW _adc_connection(check=True) execute(check_last_dataset) if send_mail in ('start', 'all'): broadcast_email('start') time_dict = TimeCollector() time_dict.register_start('total_deploy') if up_tyr: execute(update_tyr_step, time_dict, only_bina=False, check_bina=check_bina) if check_version: execute(compare_version_candidate_installed) execute(kraken.swap_all_data_nav) if env.use_load_balancer: # Upgrade kraken/jormun on first hosts set env.roledefs['eng'] = env.eng_hosts_1 env.roledefs['ws'] = env.ws_hosts_1 if manual_lb: raw_input(yellow("Please disable ENG1,3/WS1,5,6 and enable ENG2,4/WS2-4")) else: execute(switch_to_first_phase, env.eng_hosts_1, env.ws_hosts_1, env.ws_hosts_2) time_dict.register_start('kraken') execute(upgrade_kraken, wait=env.KRAKEN_RESTART_SCHEME, up_confs=up_confs, supervision=True) if check_dead: execute(check_dead_instances) execute(upgrade_jormungandr, reload=False, up_confs=up_confs) # check first hosts set before upgrading the second one for server in env.roledefs['ws']: instance = random.choice(env.instances.values()) execute(jormungandr.test_jormungandr, get_host_addr(server), instance=instance.name) # Upgrade kraken/jormun on remaining hosts env.roledefs['eng'] = env.eng_hosts_2 env.roledefs['ws'] = env.ws_hosts_2 if manual_lb: raw_input(yellow("Please enable ENG1,3/WS1,5,6 and disable ENG2,4/WS2-4")) else: execute(switch_to_second_phase, env.eng_hosts_1, env.eng_hosts_2, env.ws_hosts_1, env.ws_hosts_2) execute(upgrade_jormungandr, reload=False, up_confs=up_confs) if manual_lb: raw_input(yellow("Please enable WS1-6")) else: execute(switch_to_third_phase, env.ws_hosts_2) env.roledefs['ws'] = env.ws_hosts execute(upgrade_kraken, wait=env.KRAKEN_RESTART_SCHEME, up_confs=up_confs) time_dict.register_end('kraken') if not manual_lb: execute(enable_all_nodes, env.eng_hosts, env.ws_hosts_1, env.ws_hosts_2) env.roledefs['eng'] = env.eng_hosts else: time_dict.register_start('kraken') execute(upgrade_kraken, wait=env.KRAKEN_RESTART_SCHEME, up_confs=up_confs, supervision=True) time_dict.register_end('kraken') execute(upgrade_jormungandr, up_confs=up_confs) # check deployment OK for server in env.roledefs['ws']: instance = random.choice(env.instances.values()) execute(jormungandr.test_jormungandr, get_host_addr(server), instance=instance.name) # start tyr_beat even if up_tyr is False execute(tyr.start_tyr_beat) time_dict.register_end('total_deploy') if send_mail in ('end', 'all'): warn_dict = jormungandr.check_kraken_jormun_after_deploy() status = show_dead_kraken_status(warn_dict, show=True) status += show_time_deploy(time_dict) broadcast_email('end', status) if env.use_load_balancer and manual_lb: print(yellow("Please enable ENG1-4/WS1-4"))