def test_kill_all_journalnodes(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) journal_ids = sdk_tasks.get_task_ids(foldered_name, 'journal') data_ids = sdk_tasks.get_task_ids(sdk_utils.get_foldered_name(config.SERVICE_NAME), 'data') for journal_pod in config.get_pod_type_instances("journal", foldered_name): sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'pod restart {}'.format(journal_pod)) config.expect_recovery(service_name=foldered_name) # name nodes fail and restart, so don't check those sdk_tasks.check_tasks_updated(foldered_name, 'journal', journal_ids) sdk_tasks.check_tasks_not_updated(foldered_name, 'data', data_ids)
def test_zones_not_referenced_in_placement_constraints(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) sdk_install.uninstall(config.PACKAGE_NAME, foldered_name) sdk_install.install( config.PACKAGE_NAME, foldered_name, config.DEFAULT_BROKER_COUNT, additional_options={ "service": { "name": foldered_name } }) test_utils.broker_count_check( config.DEFAULT_BROKER_COUNT, service_name=foldered_name) broker_ids = sdk_cmd.svc_cli( config.PACKAGE_NAME, foldered_name, 'broker list', json=True) for broker_id in broker_ids: broker_info = sdk_cmd.svc_cli( config.PACKAGE_NAME, foldered_name, 'broker get {}'.format(broker_id), json=True) assert broker_info.get('rack') == None sdk_install.uninstall(config.PACKAGE_NAME, foldered_name)
def test_zones_referenced_in_placement_constraints(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) sdk_install.uninstall(config.PACKAGE_NAME, foldered_name) sdk_install.install( config.PACKAGE_NAME, foldered_name, config.DEFAULT_BROKER_COUNT, additional_options={ "service": { "name": foldered_name, "placement_constraint": "[[\"@zone\", \"GROUP_BY\"]]" } }) test_utils.broker_count_check( config.DEFAULT_BROKER_COUNT, service_name=foldered_name) broker_ids = sdk_cmd.svc_cli( config.PACKAGE_NAME, foldered_name, 'broker list', json=True) for broker_id in broker_ids: broker_info = sdk_cmd.svc_cli( config.PACKAGE_NAME, foldered_name, 'broker get {}'.format(broker_id), json=True) assert sdk_fault_domain.is_valid_zone(broker_info.get('rack')) sdk_install.uninstall(config.PACKAGE_NAME, foldered_name)
def test_custom_decommission(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) marathon_config = sdk_marathon.get_config(foldered_name) marathon_config["env"]["WORLD_COUNT"] = "1" sdk_marathon.update_app(marathon_config) sdk_plan.wait_for_completed_plan(foldered_name, "decommission") decommission_plan = sdk_plan.get_decommission_plan(foldered_name) log.info(sdk_plan.plan_string("decommission", decommission_plan)) custom_step_name = decommission_plan["phases"][0]["steps"][0]["name"] assert "custom_decommission_step" == custom_step_name # scale back up marathon_config = sdk_marathon.get_config(foldered_name) marathon_config["env"]["WORLD_COUNT"] = "2" sdk_marathon.update_app(marathon_config) sdk_plan.wait_for_completed_deployment(foldered_name) # Let's decommission again! marathon_config = sdk_marathon.get_config(foldered_name) marathon_config["env"]["WORLD_COUNT"] = "1" sdk_marathon.update_app(marathon_config) sdk_plan.wait_for_completed_deployment(foldered_name) sdk_plan.wait_for_completed_plan(foldered_name, "decommission") decommission_plan = sdk_plan.get_decommission_plan(foldered_name) log.info(sdk_plan.plan_string("decommission", decommission_plan)) custom_step_name = decommission_plan["phases"][0]["steps"][0]["name"] assert "custom_decommission_step" == custom_step_name
def test_custom_zookeeper(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) broker_ids = sdk_tasks.get_task_ids(foldered_name, '{}-'.format(config.DEFAULT_POD_TYPE)) # create a topic against the default zk: test_utils.create_topic(config.DEFAULT_TOPIC_NAME, service_name=foldered_name) marathon_config = sdk_marathon.get_config(foldered_name) # should be using default path when this envvar is empty/unset: assert marathon_config['env']['KAFKA_ZOOKEEPER_URI'] == '' # use a custom zk path that's WITHIN the 'dcos-service-' path, so that it's automatically cleaned up in uninstall: zk_path = 'master.mesos:2181/{}/CUSTOMPATH'.format(sdk_utils.get_zk_path(foldered_name)) marathon_config['env']['KAFKA_ZOOKEEPER_URI'] = zk_path sdk_marathon.update_app(foldered_name, marathon_config) sdk_tasks.check_tasks_updated(foldered_name, '{}-'.format(config.DEFAULT_POD_TYPE), broker_ids) sdk_plan.wait_for_completed_deployment(foldered_name) # wait for brokers to finish registering test_utils.broker_count_check(config.DEFAULT_BROKER_COUNT, service_name=foldered_name) zookeeper = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'endpoints zookeeper') assert zookeeper.rstrip('\n') == zk_path # topic created earlier against default zk should no longer be present: topic_list_info = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'topic list', json=True) test_utils.assert_topic_lists_are_equal_without_automatic_topics([], topic_list_info)
def test_modify_app_config_rollback(): app_config_field = 'TASKCFG_ALL_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS' foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) journal_ids = sdk_tasks.get_task_ids(foldered_name, 'journal') data_ids = sdk_tasks.get_task_ids(foldered_name, 'data') old_config = sdk_marathon.get_config(foldered_name) marathon_config = sdk_marathon.get_config(foldered_name) log.info('marathon config: ') log.info(marathon_config) expiry_ms = int(marathon_config['env'][app_config_field]) log.info('expiry ms: ' + str(expiry_ms)) marathon_config['env'][app_config_field] = str(expiry_ms + 1) sdk_marathon.update_app(foldered_name, marathon_config, timeout=15 * 60) # Wait for journal nodes to be affected by the change sdk_tasks.check_tasks_updated(foldered_name, 'journal', journal_ids) journal_ids = sdk_tasks.get_task_ids(foldered_name, 'journal') log.info('old config: ') log.info(old_config) # Put the old config back (rollback) sdk_marathon.update_app(foldered_name, old_config) # Wait for the journal nodes to return to their old configuration sdk_tasks.check_tasks_updated(foldered_name, 'journal', journal_ids) config.check_healthy(service_name=foldered_name) marathon_config = sdk_marathon.get_config(foldered_name) assert int(marathon_config['env'][app_config_field]) == expiry_ms # Data tasks should not have been affected sdk_tasks.check_tasks_not_updated(foldered_name, 'data', data_ids)
def test_increase_decrease_world_nodes(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) config.check_running(foldered_name) original_hello_ids = sdk_tasks.get_task_ids(foldered_name, 'hello') original_world_ids = sdk_tasks.get_task_ids(foldered_name, 'world') log.info('world ids: ' + str(original_world_ids)) # add 2 world nodes sdk_marathon.bump_task_count_config(foldered_name, 'WORLD_COUNT', 2) config.check_running(foldered_name) sdk_tasks.check_tasks_not_updated(foldered_name, 'world', original_world_ids) # check 2 world tasks added: assert 2 + len(original_world_ids) == len(sdk_tasks.get_task_ids(foldered_name, 'world')) # subtract 2 world nodes sdk_marathon.bump_task_count_config(foldered_name, 'WORLD_COUNT', -2) config.check_running(foldered_name) # wait for the decommission plan for this subtraction to be complete sdk_plan.wait_for_completed_plan(foldered_name, 'decommission') # check that the total task count is back to original sdk_tasks.check_running( foldered_name, len(original_hello_ids) + len(original_world_ids), allow_more=False) # check that original tasks weren't affected/relaunched in the process sdk_tasks.check_tasks_not_updated(foldered_name, 'hello', original_hello_ids) sdk_tasks.check_tasks_not_updated(foldered_name, 'world', original_world_ids) # check that the world tasks are back to their prior state (also without changing task ids) assert original_world_ids == sdk_tasks.get_task_ids(foldered_name, 'world')
def test_endpoints(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) # check that we can reach the scheduler via admin router, and that returned endpoints are sanitized: core_site = etree.fromstring(sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'endpoints core-site.xml')) check_properties(core_site, { 'ha.zookeeper.parent-znode': '/{}/hadoop-ha'.format(sdk_utils.get_zk_path( foldered_name)) }) hdfs_site = etree.fromstring(sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'endpoints hdfs-site.xml')) expect = { 'dfs.namenode.shared.edits.dir': 'qjournal://{}/hdfs'.format(';'.join([ sdk_hosts.autoip_host( foldered_name, 'journal-{}-node'.format(i), 8485 ) for i in range(3)])), } for i in range(2): name_node = 'name-{}-node'.format(i) expect['dfs.namenode.rpc-address.hdfs.{}'.format(name_node)] = sdk_hosts.autoip_host( foldered_name, name_node, 9001) expect['dfs.namenode.http-address.hdfs.{}'.format(name_node)] = sdk_hosts.autoip_host( foldered_name, name_node, 9002) check_properties(hdfs_site, expect)
def test_modify_app_config(): """This tests checks that the modification of the app config does not trigger a recovery.""" foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) sdk_plan.wait_for_completed_recovery(foldered_name) old_recovery_plan = sdk_plan.get_plan(foldered_name, "recovery") app_config_field = 'TASKCFG_ALL_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS' journal_ids = sdk_tasks.get_task_ids(foldered_name, 'journal') name_ids = sdk_tasks.get_task_ids(foldered_name, 'name') data_ids = sdk_tasks.get_task_ids(foldered_name, 'data') marathon_config = sdk_marathon.get_config(foldered_name) log.info('marathon config: ') log.info(marathon_config) expiry_ms = int(marathon_config['env'][app_config_field]) marathon_config['env'][app_config_field] = str(expiry_ms + 1) sdk_marathon.update_app(foldered_name, marathon_config, timeout=15 * 60) # All tasks should be updated because hdfs-site.xml has changed config.check_healthy(service_name=foldered_name) sdk_tasks.check_tasks_updated(foldered_name, 'journal', journal_ids) sdk_tasks.check_tasks_updated(foldered_name, 'name', name_ids) sdk_tasks.check_tasks_updated(foldered_name, 'data', data_ids) sdk_plan.wait_for_completed_recovery(foldered_name) new_recovery_plan = sdk_plan.get_plan(foldered_name, "recovery") assert old_recovery_plan == new_recovery_plan
def test_bump_data_nodes(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) data_ids = sdk_tasks.get_task_ids(foldered_name, 'data') log.info('data ids: ' + str(data_ids)) sdk_marathon.bump_task_count_config(foldered_name, 'DATA_COUNT') config.check_healthy(service_name=foldered_name, count=config.DEFAULT_TASK_COUNT + 1) sdk_tasks.check_tasks_not_updated(foldered_name, 'data', data_ids)
def test_pod_info(): jsonobj = sdk_cmd.svc_cli(config.PACKAGE_NAME, sdk_utils.get_foldered_name(config.SERVICE_NAME), 'pod info world-1', json=True) assert len(jsonobj) == 1 task = jsonobj[0] assert len(task) == 2 assert task['info']['name'] == 'world-1-server' assert task['info']['taskId']['value'] == task['status']['taskId']['value'] assert task['status']['state'] == 'TASK_RUNNING'
def test_config_cli(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) configs = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'debug config list', json=True) assert len(configs) >= 1 # refrain from breaking this test if earlier tests did a config update assert sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'debug config show {}'.format(configs[0]), print_output=False) # noisy output assert sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'debug config target', json=True) assert sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'debug config target_id', json=True)
def test_broker_invalid(): try: sdk_cmd.svc_cli( config.PACKAGE_NAME, sdk_utils.get_foldered_name(config.SERVICE_NAME), 'broker get {}'.format(config.DEFAULT_BROKER_COUNT + 1), json=True) assert False, "Should have failed" except AssertionError as arg: raise arg except: pass # expected to fail
def test_finish_task_restarts_on_config_update(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) config.check_running(foldered_name) task_name = "world-0-finish" world_finish_id = get_completed_task_id(task_name) assert world_finish_id is not None log.info("%s ID: %s", task_name, world_finish_id) config.bump_world_cpus(foldered_name) sdk_tasks.check_task_relaunched(task_name, world_finish_id, ensure_new_task_not_completed=False) config.check_running(foldered_name)
def test_kill_data_node(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) data_ids = sdk_tasks.get_task_ids(foldered_name, 'data-0') journal_ids = sdk_tasks.get_task_ids(foldered_name, 'journal') name_ids = sdk_tasks.get_task_ids(foldered_name, 'name') sdk_cmd.kill_task_with_pattern('datanode', sdk_hosts.system_host(foldered_name, 'data-0-node')) config.expect_recovery(service_name=foldered_name) sdk_tasks.check_tasks_updated(foldered_name, 'data', data_ids) sdk_tasks.check_tasks_not_updated(foldered_name, 'journal', journal_ids) sdk_tasks.check_tasks_not_updated(foldered_name, 'name', name_ids)
def test_finish_task_restarts_on_config_update(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) config.check_running(foldered_name) task_name = 'world-0-finish' world_finish_id = sdk_tasks.get_completed_task_id(task_name) assert world_finish_id is not None log.info('world_finish_id: ' + str(world_finish_id)) updated_cpus = config.bump_world_cpus(foldered_name) sdk_tasks.check_task_relaunched(task_name, world_finish_id) config.check_running(foldered_name)
def test_pod_status_one(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) # /test/integration/hello-world => test.integration.hello-world sanitized_name = sdk_utils.get_task_id_service_name(foldered_name) jsonobj = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'pod status --json hello-0', json=True) assert jsonobj['name'] == 'hello-0' assert len(jsonobj['tasks']) == 1 task = jsonobj['tasks'][0] assert len(task) == 3 assert re.match(sanitized_name + '__hello-0-server__[0-9a-f-]+', task['id']) assert task['name'] == 'hello-0-server' assert task['status'] == 'RUNNING'
def configure_package(configure_security): try: foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) sdk_install.uninstall(config.PACKAGE_NAME, foldered_name) sdk_install.install( config.PACKAGE_NAME, foldered_name, config.DEFAULT_TASK_COUNT, additional_options={"service": {"name": foldered_name, "yaml": "finish_state"}}) yield # let the test session execute finally: sdk_install.uninstall(config.PACKAGE_NAME, foldered_name)
def test_finish_install_on_failure(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) sdk_install.uninstall(config.PACKAGE_NAME, foldered_name) with pytest.raises(sdk_plan.TaskFailuresExceededException): sdk_install.install( config.PACKAGE_NAME, foldered_name, 1, additional_options={ "service": {"name": foldered_name, "yaml": "non_recoverable_state"} }, ) sdk_install.uninstall(config.PACKAGE_NAME, foldered_name)
def test_state_refresh_disable_cache(): '''Disables caching via a scheduler envvar''' foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) config.check_running(foldered_name) task_ids = sdk_tasks.get_task_ids(foldered_name, '') # caching enabled by default: stdout = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'debug state refresh_cache') assert "Received cmd: refresh" in stdout marathon_config = sdk_marathon.get_config(foldered_name) marathon_config['env']['DISABLE_STATE_CACHE'] = 'any-text-here' sdk_marathon.update_app(foldered_name, marathon_config) sdk_tasks.check_tasks_not_updated(foldered_name, '', task_ids) config.check_running(foldered_name) # caching disabled, refresh_cache should fail with a 409 error (eventually, once scheduler is up): @retrying.retry( wait_fixed=1000, stop_max_delay=120*1000, retry_on_result=lambda res: not res) def check_cache_refresh_fails_409conflict(): output = sdk_cmd.svc_cli( config.PACKAGE_NAME, foldered_name, 'debug state refresh_cache', return_stderr_in_stdout=True) return "failed: 409 Conflict" in output check_cache_refresh_fails_409conflict() marathon_config = sdk_marathon.get_config(foldered_name) del marathon_config['env']['DISABLE_STATE_CACHE'] sdk_marathon.update_app(foldered_name, marathon_config) sdk_tasks.check_tasks_not_updated(foldered_name, '', task_ids) config.check_running(foldered_name) shakedown.deployment_wait() # ensure marathon thinks the deployment is complete too # caching reenabled, refresh_cache should succeed (eventually, once scheduler is up): @retrying.retry( wait_fixed=1000, stop_max_delay=120*1000, retry_on_result=lambda res: not res) def check_cache_refresh(): return sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'debug state refresh_cache') stdout = check_cache_refresh() assert "Received cmd: refresh" in stdout
def test_bump_journal_cpus(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) journal_ids = sdk_tasks.get_task_ids(foldered_name, 'journal') name_ids = sdk_tasks.get_task_ids(foldered_name, 'name') log.info('journal ids: ' + str(journal_ids)) sdk_marathon.bump_cpu_count_config(foldered_name, 'JOURNAL_CPUS') sdk_tasks.check_tasks_updated(foldered_name, 'journal', journal_ids) # journal node update should not cause any of the name nodes to crash # if the name nodes crashed, then it implies the journal nodes were updated in parallel, when they should've been updated serially # for journal nodes, the deploy plan is parallel, while the update plan is serial. maybe the deploy plan was mistakenly used? sdk_tasks.check_tasks_not_updated(foldered_name, 'name', name_ids) config.check_healthy(service_name=foldered_name)
def test_once_task_does_not_restart_on_config_update(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) config.check_running(foldered_name) sdk_plan.wait_for_completed_deployment(foldered_name) task_name = 'hello-0-once' hello_once_id = sdk_tasks.get_completed_task_id(task_name) assert hello_once_id is not None log.info('hello_once_id: ' + str(hello_once_id)) updated_cpus = config.bump_hello_cpus(foldered_name) sdk_tasks.check_task_not_relaunched(foldered_name, task_name, hello_once_id) config.check_running(foldered_name)
def test_kill_all_datanodes(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) journal_ids = sdk_tasks.get_task_ids(foldered_name, 'journal') name_ids = sdk_tasks.get_task_ids(foldered_name, 'name') data_ids = sdk_tasks.get_task_ids(foldered_name, 'data') for data_pod in config.get_pod_type_instances("data", foldered_name): sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'pod restart {}'.format(data_pod)) config.expect_recovery(service_name=foldered_name) sdk_tasks.check_tasks_updated(foldered_name, 'data', data_ids) sdk_tasks.check_tasks_not_updated(foldered_name, 'journal', journal_ids) sdk_tasks.check_tasks_not_updated(foldered_name, 'name', name_ids)
def test_pod_status_all(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) # /test/integration/hello-world => test.integration.hello-world sanitized_name = sdk_utils.get_task_id_service_name(foldered_name) jsonobj = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'pod status --json', json=True) assert jsonobj['service'] == foldered_name for pod in jsonobj['pods']: assert re.match('(hello|world)', pod['name']) for instance in pod['instances']: assert re.match('(hello|world)-[0-9]+', instance['name']) for task in instance['tasks']: assert len(task) == 3 assert re.match(sanitized_name + '__(hello|world)-[0-9]+-server__[0-9a-f-]+', task['id']) assert re.match('(hello|world)-[0-9]+-server', task['name']) assert task['status'] == 'RUNNING'
def test_bump_hello_cpus(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) config.check_running(foldered_name) hello_ids = sdk_tasks.get_task_ids(foldered_name, 'hello') log.info('hello ids: ' + str(hello_ids)) updated_cpus = config.bump_hello_cpus(foldered_name) sdk_tasks.check_tasks_updated(foldered_name, 'hello', hello_ids) config.check_running(foldered_name) all_tasks = shakedown.get_service_tasks(foldered_name) running_tasks = [t for t in all_tasks if t['name'].startswith('hello') and t['state'] == "TASK_RUNNING"] assert len(running_tasks) == config.hello_task_count(foldered_name) for t in running_tasks: assert config.close_enough(t['resources']['cpus'], updated_cpus)
def test_pod_list(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) jsonobj = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'pod list', json=True) assert len(jsonobj) == config.configured_task_count(foldered_name) # expect: X instances of 'hello-#' followed by Y instances of 'world-#', # in alphanumerical order first_world = -1 for i in range(len(jsonobj)): entry = jsonobj[i] if first_world < 0: if entry.startswith('world-'): first_world = i if first_world == -1: assert jsonobj[i] == 'hello-{}'.format(i) else: assert jsonobj[i] == 'world-{}'.format(i - first_world)
def configure_package(configure_security): try: foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) service_options = {"service": {"name": foldered_name, "scenario": "CUSTOM_DECOMMISSION"}} sdk_install.uninstall(config.PACKAGE_NAME, foldered_name) sdk_upgrade.test_upgrade( config.PACKAGE_NAME, foldered_name, config.DEFAULT_TASK_COUNT, from_options=service_options, to_options=service_options, ) yield # let the test session execute finally: sdk_install.uninstall(config.PACKAGE_NAME, foldered_name)
def test_metrics(): expected_metrics = [ "kafka.network.RequestMetrics.ResponseQueueTimeMs.max", "kafka.socket-server-metrics.io-ratio", "kafka.controller.ControllerStats.LeaderElectionRateAndTimeMs.p95" ] def expected_metrics_exist(emitted_metrics): return sdk_metrics.check_metrics_presence(emitted_metrics, expected_metrics) sdk_metrics.wait_for_service_metrics( config.PACKAGE_NAME, sdk_utils.get_foldered_name(config.SERVICE_NAME), "kafka-0-broker", config.DEFAULT_KAFKA_TIMEOUT, expected_metrics_exist )
def test_permanent_and_transient_namenode_failures_1_0(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) config.check_healthy(service_name=foldered_name) name_0_ids = sdk_tasks.get_task_ids(foldered_name, 'name-0') name_1_ids = sdk_tasks.get_task_ids(foldered_name, 'name-1') journal_ids = sdk_tasks.get_task_ids(foldered_name, 'journal') data_ids = sdk_tasks.get_task_ids(foldered_name, 'data') sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'pod replace name-1') sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'pod restart name-0') config.expect_recovery(service_name=foldered_name) sdk_tasks.check_tasks_updated(foldered_name, 'name-0', name_0_ids) sdk_tasks.check_tasks_updated(foldered_name, 'name-1', name_1_ids) sdk_tasks.check_tasks_not_updated(foldered_name, 'journal', journal_ids) sdk_tasks.check_tasks_not_updated(foldered_name, 'data', data_ids)
def configure_package(configure_security): try: foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) sdk_install.uninstall(config.PACKAGE_NAME, foldered_name) sdk_upgrade.test_upgrade( config.PACKAGE_NAME, foldered_name, config.DEFAULT_BROKER_COUNT, additional_options={"service": {"name": foldered_name}, "brokers": {"cpus": 0.5}}) # wait for brokers to finish registering before starting tests test_utils.broker_count_check(config.DEFAULT_BROKER_COUNT, service_name=foldered_name) yield # let the test session execute finally: sdk_install.uninstall(config.PACKAGE_NAME, foldered_name)
def test_indexing(default_populated_index): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) indices_stats = config.get_elasticsearch_indices_stats(config.DEFAULT_INDEX_NAME, service_name=foldered_name) assert indices_stats["_all"]["primaries"]["docs"]["count"] == 1 doc = config.get_document(config.DEFAULT_INDEX_NAME, config.DEFAULT_INDEX_TYPE, 1, service_name=foldered_name) assert doc["_source"]["name"] == "Loren"
def test_coordinator_node_replace(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'pod replace coordinator-0') sdk_plan.wait_for_in_progress_recovery(foldered_name) sdk_plan.wait_for_completed_recovery(foldered_name)
def get_foldered_service_name() -> str: return sdk_utils.get_foldered_name(SERVICE_NAME)
def pre_test_setup(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) sdk_tasks.check_running(foldered_name, current_expected_task_count) config.wait_for_expected_nodes_to_exist(service_name=foldered_name, task_count=current_expected_task_count)
def test_unchanged_scheduler_restarts_without_restarting_tasks(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) initial_task_ids = sdk_tasks.get_task_ids(foldered_name, '') shakedown.kill_process_on_host(sdk_marathon.get_scheduler_host(foldered_name), "elastic.scheduler.Main") sdk_tasks.check_tasks_not_updated(foldered_name, '', initial_task_ids)
def test_topic_delete(): test_utils.delete_topic(config.EPHEMERAL_TOPIC_NAME, sdk_utils.get_foldered_name(config.SERVICE_NAME))
import pytest import sdk_install import sdk_utils from tests import config FOLDERED_SERVICE_NAME = sdk_utils.get_foldered_name(config.PACKAGE_NAME) @pytest.fixture(scope='module', autouse=True) def configure_package(configure_security): try: sdk_install.uninstall(FOLDERED_SERVICE_NAME, package_name=config.PACKAGE_NAME) # note: this package isn't released to universe, so there's nothing to test_upgrade() with sdk_install.install( config.PACKAGE_NAME, config.DEFAULT_TASK_COUNT, service_name=FOLDERED_SERVICE_NAME, additional_options={"service": { "name": FOLDERED_SERVICE_NAME }}) yield # let the test session execute finally: sdk_install.uninstall(FOLDERED_SERVICE_NAME, package_name=config.PACKAGE_NAME) @pytest.mark.sanity @pytest.mark.smoke
import sdk_cmd as cmd import sdk_hosts as hosts import sdk_install as install import sdk_jobs as jobs import sdk_plan as plan import sdk_utils as utils WRITE_DATA_JOB = get_write_data_job(node_address=FOLDERED_NODE_ADDRESS) VERIFY_DATA_JOB = get_verify_data_job(node_address=FOLDERED_NODE_ADDRESS) DELETE_DATA_JOB = get_delete_data_job(node_address=FOLDERED_NODE_ADDRESS) VERIFY_DELETION_JOB = get_verify_deletion_job( node_address=FOLDERED_NODE_ADDRESS) TEST_JOBS = [ WRITE_DATA_JOB, VERIFY_DATA_JOB, DELETE_DATA_JOB, VERIFY_DELETION_JOB ] FOLDERED_SERVICE_NAME = utils.get_foldered_name(PACKAGE_NAME) def setup_module(module): install.uninstall(FOLDERED_SERVICE_NAME, package_name=PACKAGE_NAME) utils.gc_frameworks() # check_suppression=False due to https://jira.mesosphere.com/browse/CASSANDRA-568 install.install( PACKAGE_NAME, DEFAULT_TASK_COUNT, service_name=FOLDERED_SERVICE_NAME, additional_options={"service": { "name": FOLDERED_SERVICE_NAME }}, check_suppression=False)
def test_service_health(): assert shakedown.service_healthy(sdk_utils.get_foldered_name(config.SERVICE_NAME))
def test_pod_replace(): test_utils.replace_broker_pod( sdk_utils.get_foldered_name(config.SERVICE_NAME))
def test_help_cli(): sdk_cmd.svc_cli(config.PACKAGE_NAME, sdk_utils.get_foldered_name(config.SERVICE_NAME), 'help')
def test_kill_scheduler(): sdk_cmd.kill_task_with_pattern('hdfs.scheduler.Main', shakedown.get_service_ips('marathon').pop()) config.check_healthy(service_name=sdk_utils.get_foldered_name(config.SERVICE_NAME))
def test_mesos_v0_api(): service_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) prior_api_version = sdk_marathon.get_mesos_api_version(service_name) if prior_api_version is not "V0": sdk_marathon.set_mesos_api_version(service_name, "V0") sdk_marathon.set_mesos_api_version(service_name, prior_api_version)
def test_losing_and_regaining_index_health(default_populated_index): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) config.check_elasticsearch_index_health(config.DEFAULT_INDEX_NAME, "green", service_name=foldered_name) shakedown.kill_process_on_host(sdk_hosts.system_host(foldered_name, "data-0-node"), "data__.*Elasticsearch") config.check_elasticsearch_index_health(config.DEFAULT_INDEX_NAME, "yellow", service_name=foldered_name) config.check_elasticsearch_index_health(config.DEFAULT_INDEX_NAME, "green", service_name=foldered_name)
def get_foldered_dns_name(service_name): if sdk_utils.dcos_version_less_than('1.10'): return service_name return sdk_utils.get_foldered_name(service_name).replace("/", "")
def test_install(): config.check_healthy(service_name=sdk_utils.get_foldered_name(config.SERVICE_NAME))
import re import retrying import sdk_cmd import sdk_install import sdk_marathon import sdk_metrics import sdk_plan import sdk_tasks import sdk_upgrade import sdk_utils from tests import config log = logging.getLogger(__name__) foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) @pytest.fixture(scope="module", autouse=True) def configure_package(configure_security): try: service_options = {"service": {"name": foldered_name}} sdk_install.uninstall(config.PACKAGE_NAME, foldered_name) sdk_upgrade.test_upgrade( config.PACKAGE_NAME, foldered_name, config.DEFAULT_TASK_COUNT, from_options=service_options, ) yield {"package_name": config.PACKAGE_NAME, **service_options}
def pre_test_setup(): config.check_healthy(service_name=sdk_utils.get_foldered_name(config.SERVICE_NAME))
def test_pods_restart(): test_utils.restart_broker_pods( sdk_utils.get_foldered_name(config.SERVICE_NAME))
def test_install(): config.check_running(sdk_utils.get_foldered_name(config.SERVICE_NAME))