def test_partition_master_outgoing(): master_leader_ip = shakedown.master_leader_ip() shakedown.partition_master(master_leader_ip, incoming=False, outgoing=True) time.sleep(20) shakedown.reconnect_master(master_leader_ip) check_health()
def test_partition_master_both_ways(): master_leader_ip = shakedown.master_leader_ip() shakedown.partition_master(master_leader_ip) time.sleep(20) shakedown.reconnect_master(master_leader_ip) check_health()
def test_partition(install_framework): host = get_node_host() shakedown.partition_agent(host) shakedown.reconnect_agent(host) check_health()
def test_kill_all_task_in_node(): hosts = shakedown.get_service_ips(PACKAGE_NAME) for host in hosts: kill_task_with_pattern('CassandraDaemon', host) recover_failed_agents(hosts) check_health()
def test_config_update_then_master_killed(): master_leader_ip = shakedown.master_leader_ip() run_planned_operation( lambda: bump_cpu_count_config(-0.1), lambda: kill_task_with_pattern('mesos-master', master_leader_ip)) verify_leader_changed(master_leader_ip) check_health()
def test_cleanup_then_kill_all_task_in_node(install_framework): hosts = shakedown.get_service_ips(PACKAGE_NAME) run_planned_operation( run_cleanup, lambda: [kill_task_with_pattern('CassandraDaemon', h) for h in hosts]) check_health()
def test_config_update_then_scheduler_died(install_framework): host = get_scheduler_host() run_planned_operation( bump_cpu_count_config, lambda: kill_task_with_pattern('cassandra.scheduler.Main', host)) check_health()
def test_cleanup_then_scheduler_died(install_framework): host = get_scheduler_host() run_planned_operation( run_cleanup, lambda: kill_task_with_pattern('cassandra.scheduler.Main', host)) check_health()
def test_all_executors_killed(): hosts = shakedown.get_service_ips(PACKAGE_NAME) for host in hosts: kill_task_with_pattern('cassandra.executor.Main', host) recover_failed_agents(hosts) check_health()
def test_repair_then_scheduler_died(): host = get_scheduler_host() run_planned_operation( run_repair, lambda: kill_task_with_pattern('cassandra.scheduler.Main', host)) check_health()
def test_config_update_then_kill_task_in_node(install_framework): host = get_node_host() run_planned_operation( bump_cpu_count_config, lambda: kill_task_with_pattern('CassandraDaemon', host)) check_health()
def test_nodes_decrease_by_one_should_fail(): completed_plan = infinity_commons.get_and_verify_plan( lambda p: p['status'] == infinity_commons.PlanState.COMPLETE.value) mc = dcos.marathon.create_client() app = mc.get_app('/cassandra') app = infinity_commons.strip_meta(app) oe = app['env'] env_node_count = int(oe['NODES']) - 1 oe['NODES'] = str(env_node_count) app['env'] = oe print("Updated node count: {}".format(app['env']['NODES'])) print(mc.update_app(app_id='/cassandra', payload=app, force=True)) check_health() plan = infinity_commons.get_and_verify_plan( lambda p: (p['status'] == infinity_commons.PlanState.ERROR.value and len(infinity_commons.filter_phase(p, "Deploy")['steps']) == 3)) print(plan) assert plan['status'] == infinity_commons.PlanState.ERROR.value # Revert oe = app['env'] env_node_count = int(oe['NODES']) + 1 oe['NODES'] = str(env_node_count) app['env'] = oe print("Reverted node count: {}".format(app['env']['NODES'])) print(mc.update_app(app_id='/cassandra', payload=app, force=True)) check_health() plan = infinity_commons.get_and_verify_plan( lambda p: (p['status'] == infinity_commons.PlanState.COMPLETE.value) and (len(infinity_commons.filter_phase(p, "Deploy")['steps']) == 3)) print(plan) assert plan['status'] == infinity_commons.PlanState.COMPLETE.value
def test_change_disk_should_fail(): completed_plan = infinity_commons.get_and_verify_plan(lambda p: p['status'] == infinity_commons.PlanState.COMPLETE.value) mc = dcos.marathon.create_client() app = mc.get_app('/cassandra') app = infinity_commons.strip_meta(app) oe = app['env'] disk = int(oe['CASSANDRA_DISK_MB']) - 1 oe['CASSANDRA_DISK_MB'] = str(disk) app['env'] = oe print("Updated CASSANDRA_DISK_MB: {}".format(app['env']['CASSANDRA_DISK_MB'])) print(mc.update_app(app_id='/cassandra', payload=app, force=True)) check_health() plan = infinity_commons.get_and_verify_plan(lambda p: p['status'] == infinity_commons.PlanState.ERROR.value) print(plan) assert plan['status'] == infinity_commons.PlanState.ERROR.value # Revert oe = app['env'] disk = int(oe['CASSANDRA_DISK_MB']) + 1 oe['CASSANDRA_DISK_MB'] = str(disk) app['env'] = oe print("Reverted CASSANDRA_DISK_MB: {}".format(app['env']['CASSANDRA_DISK_MB'])) print(mc.update_app(app_id='/cassandra', payload=app, force=True)) check_health() plan = infinity_commons.get_and_verify_plan(lambda p: p['status'] == infinity_commons.PlanState.COMPLETE.value) print(plan) assert plan['status'] == infinity_commons.PlanState.COMPLETE.value
def test_config_update_then_executor_killed(install_framework): host = get_node_host() run_planned_operation( bump_cpu_count_config, lambda: kill_task_with_pattern('cassandra.executor.Main', host)) check_health()
def test_cleanup_then_executor_killed(install_framework): host = get_node_host() run_planned_operation( run_cleanup, lambda: kill_task_with_pattern('cassandra.executor.Main', host)) check_health()
def install_framework(): uninstall() shakedown.install_package_and_wait(PACKAGE_NAME) check_health() yield uninstall()
def test_partition(): host = get_node_host() shakedown.partition_agent(host) shakedown.reconnect_agent(host) check_health()
def test_cleanup_then_all_executors_killed(install_framework): hosts = shakedown.get_service_ips(PACKAGE_NAME) run_planned_operation( run_cleanup(), lambda: [kill_task_with_pattern('cassandra.executor.Main', h) for h in hosts]) check_health()
def test_repair_then_master_killed(): run_planned_operation( run_repair, lambda: kill_task_with_pattern('mesos-master') ) check_health()
def test_repair_then_zk_killed(): run_planned_operation( run_repair, lambda: kill_task_with_pattern('zookeeper') ) check_health()
def install_framework(): shakedown.install_package_and_wait(PACKAGE_NAME) check_health() yield uninstall()
def test_nodes_increase_by_one(): completed_plan = infinity_commons.get_and_verify_plan( lambda p: p['status'] == infinity_commons.PlanState.COMPLETE.value) mc = dcos.marathon.create_client() app = mc.get_app('/cassandra') app = infinity_commons.strip_meta(app) oe = app['env'] env_node_count = int(oe['NODES']) + 1 oe['NODES'] = str(env_node_count) app['env'] = oe print("Updated node count: {}".format(app['env']['NODES'])) print(mc.update_app(app_id='/cassandra', payload=app, force=True)) check_health() plan = infinity_commons.get_and_verify_plan(lambda p: ( p['status'] == infinity_commons.PlanState.COMPLETE.value and len( infinity_commons.filter_phase(p, "Deploy")['steps']) == 4 and (infinity_commons.filter_phase(p, "Deploy")['steps'][ env_node_count - 1]['status'] == infinity_commons.PlanState. COMPLETE.value))) print(plan) assert plan['status'] == infinity_commons.PlanState.COMPLETE.value # reinstall after increase: uninstall() install() check_health()
def test_change_disk_should_fail(): completed_plan = infinity_commons.get_and_verify_plan( lambda p: p['status'] == infinity_commons.PlanState.COMPLETE.value) mc = dcos.marathon.create_client() app = mc.get_app('/cassandra') app = infinity_commons.strip_meta(app) oe = app['env'] disk = int(oe['CASSANDRA_DISK_MB']) - 1 oe['CASSANDRA_DISK_MB'] = str(disk) app['env'] = oe print("Updated CASSANDRA_DISK_MB: {}".format( app['env']['CASSANDRA_DISK_MB'])) print(mc.update_app(app_id='/cassandra', payload=app, force=True)) check_health() plan = infinity_commons.get_and_verify_plan( lambda p: p['status'] == infinity_commons.PlanState.ERROR.value) print(plan) assert plan['status'] == infinity_commons.PlanState.ERROR.value # Revert oe = app['env'] disk = int(oe['CASSANDRA_DISK_MB']) + 1 oe['CASSANDRA_DISK_MB'] = str(disk) app['env'] = oe print("Reverted CASSANDRA_DISK_MB: {}".format( app['env']['CASSANDRA_DISK_MB'])) print(mc.update_app(app_id='/cassandra', payload=app, force=True)) check_health() plan = infinity_commons.get_and_verify_plan( lambda p: p['status'] == infinity_commons.PlanState.COMPLETE.value) print(plan) assert plan['status'] == infinity_commons.PlanState.COMPLETE.value
def test_upgrade_downgrade(): test_repo_name, test_repo_url = get_test_repo_info() test_version = get_pkg_version() print('Found test version: {}'.format(test_version)) remove_repo(test_repo_name, test_version) master_version = get_pkg_version() print('Found master version: {}'.format(master_version)) print('Installing master version') install(package_version = master_version) check_health() plan = infinity_commons.get_and_verify_plan(lambda p: p['status'] == infinity_commons.PlanState.COMPLETE.value) assert plan['status'] == infinity_commons.PlanState.COMPLETE.value # TODO: write some data print('Upgrading to test version') destroy_service() add_repo(test_repo_name, test_repo_url, master_version) install(package_version = test_version) check_post_version_change_health() print('Downgrading to master version') destroy_service() install(package_version = master_version) check_post_version_change_health()
def test_cleanup_then_kill_task_in_node(install_framework): host = get_node_host() run_planned_operation( run_cleanup, lambda: kill_task_with_pattern('CassandraDaemon', host) ) check_health()
def test_repair_then_zk_killed(): master_leader_ip = shakedown.master_leader_ip() run_planned_operation( run_repair, lambda: kill_task_with_pattern('zookeeper', master_leader_ip), lambda: verify_leader_changed(master_leader_ip)) check_health()
def test_cleanup_then_scheduler_died(install_framework): host = get_scheduler_host() run_planned_operation( run_cleanup, lambda: kill_task_with_pattern('cassandra.scheduler.Main', host) ) check_health()
def test_cleanup_then_master_killed(): master_leader_ip = shakedown.master_leader_ip() run_planned_operation( run_cleanup, lambda: kill_task_with_pattern('mesos-master', master_leader_ip)) verify_leader_changed(master_leader_ip) check_health()
def test_config_update_then_all_executors_killed(): hosts = shakedown.get_service_ips(PACKAGE_NAME) run_planned_operation( bump_cpu_count_config, lambda: [kill_task_with_pattern('cassandra.executor.Main', h) for h in hosts], lambda: recover_failed_agents(hosts)) check_health()
def test_cleanup_then_executor_killed(install_framework): host = get_node_host() run_planned_operation( run_cleanup, lambda: kill_task_with_pattern('cassandra.executor.Main', host) ) check_health()
def test_cleanup_then_kill_all_task_in_node(install_framework): hosts = shakedown.get_service_ips(PACKAGE_NAME) run_planned_operation( run_cleanup, lambda: [kill_task_with_pattern('CassandraDaemon', h) for h in hosts] ) check_health()
def test_config_update_then_zk_killed(): master_leader_ip = shakedown.master_leader_ip() run_planned_operation( bump_cpu_count_config, lambda: kill_task_with_pattern('zookeeper', master_leader_ip), lambda: verify_leader_changed(master_leader_ip)) check_health()
def test_config_update_then_executor_killed(): host = get_node_host() run_planned_operation( lambda: bump_cpu_count_config(-0.1), lambda: kill_task_with_pattern('cassandra.executor.Main', host), lambda: recover_failed_agents([host])) check_health()
def test_repair_then_scheduler_died(): host = get_scheduler_host() run_planned_operation( run_repair, lambda: kill_task_with_pattern('cassandra.scheduler.Main', host) ) check_health()
def test_config_update_then_executor_killed(): host = get_node_host() run_planned_operation( bump_cpu_count_config, lambda: kill_task_with_pattern('cassandra.executor.Main', host) ) check_health()
def test_repair_then_executor_killed(): host = get_node_host() run_planned_operation( run_repair, lambda: kill_task_with_pattern('cassandra.executor.Main', host) ) check_health()
def test_config_update_then_kill_all_task_in_node(): hosts = shakedown.get_service_ips(PACKAGE_NAME) run_planned_operation( bump_cpu_count_config, lambda: [kill_task_with_pattern('CassandraDaemon', h) for h in hosts] ) check_health()
def test_config_update_then_scheduler_died(): host = get_scheduler_host() run_planned_operation( bump_cpu_count_config, lambda: kill_task_with_pattern('cassandra.scheduler.Main', host) ) check_health()
def test_config_update_then_kill_task_in_node(): host = get_node_host() run_planned_operation( bump_cpu_count_config, lambda: kill_task_with_pattern('CassandraDaemon', host) ) check_health()
def test_partition(): host = get_node_host() _block_on_adminrouter() shakedown.partition_agent(host) shakedown.reconnect_agent(host) check_health()
def test_repair_then_kill_all_task_in_node(): hosts = shakedown.get_service_ips(PACKAGE_NAME) run_planned_operation( run_repair, lambda: [kill_task_with_pattern('CassandraDaemon', h) for h in hosts] ) check_health()
def test_repair_then_kill_task_in_node(): host = get_node_host() run_planned_operation( run_repair, lambda: kill_task_with_pattern('CassandraDaemon', host) ) check_health()
def test_all_partition(): hosts = shakedown.get_service_ips(PACKAGE_NAME) for host in hosts: shakedown.partition_agent(host) for host in hosts: shakedown.reconnect_agent(host) check_health()
def test_repair_then_kill_task_in_node(): hosts = shakedown.get_service_ips(PACKAGE_NAME) host = get_node_host() run_planned_operation( run_repair, lambda: kill_task_with_pattern('CassandraDaemon', host), lambda: recover_failed_agents(hosts)) check_health()
def test_cleanup_then_executor_killed(): host = get_node_host() run_planned_operation( run_cleanup, lambda: kill_task_with_pattern('cassandra.executor.Main', host), lambda: recover_failed_agents([host])) check_health()
def test_cleanup_then_all_executors_killed(install_framework): hosts = shakedown.get_service_ips(PACKAGE_NAME) run_planned_operation( run_cleanup(), lambda: [ kill_task_with_pattern('cassandra.executor.Main', h) for h in hosts ] ) check_health()
def test_repair_then_partition(): host = get_node_host() def partition(): shakedown.partition_agent(host) shakedown.reconnect_agent(host) run_planned_operation(run_repair, partition) check_health()
def test_repair_then_all_executors_killed(): hosts = shakedown.get_service_ips(PACKAGE_NAME) run_planned_operation( run_repair, lambda: [ kill_task_with_pattern('cassandra.executor.Main', h) for h in hosts ] ) check_health()
def test_config_update_then_all_executors_killed(): hosts = shakedown.get_service_ips(PACKAGE_NAME) run_planned_operation( bump_cpu_count_config, lambda: [ kill_task_with_pattern('cassandra.executor.Main', h) for h in hosts ] ) check_health()
def test_config_update_then_partition(): host = get_node_host() def partition(): shakedown.partition_agent(host) shakedown.reconnect_agent(host) run_planned_operation(bump_cpu_count_config, partition) check_health()
def test_cleanup_then_partition(install_framework): host = get_node_host() def partition(): shakedown.partition_agent(host) shakedown.reconnect_agent(host) run_planned_operation(run_cleanup, partition) check_health()
def test_cleanup_then_all_partition(install_framework): hosts = shakedown.get_service_ips(PACKAGE_NAME) def partition(): for host in hosts: shakedown.partition_agent(host) for host in hosts: shakedown.reconnect_agent(host) run_planned_operation(run_cleanup, partition) check_health()
def test_repair_then_all_partition(): hosts = shakedown.get_service_ips(PACKAGE_NAME) def partition(): for host in hosts: shakedown.partition_agent(host) for host in hosts: shakedown.reconnect_agent(host) run_planned_operation(run_repair, partition) check_health()
def test_config_update_then_all_partition(): hosts = shakedown.get_service_ips(PACKAGE_NAME) def partition(): for host in hosts: shakedown.partition_agent(host) for host in hosts: shakedown.reconnect_agent(host) run_planned_operation(bump_cpu_count_config, partition) check_health()