def test_modify_app_config(): """This tests checks that the modification of the app config does not trigger a recovery.""" sdk_plan.wait_for_completed_recovery(foldered_name) old_recovery_plan = sdk_plan.get_plan(foldered_name, "recovery") app_config_field = "TASKCFG_ALL_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS" journal_ids = sdk_tasks.get_task_ids(foldered_name, "journal") name_ids = sdk_tasks.get_task_ids(foldered_name, "name") data_ids = sdk_tasks.get_task_ids(foldered_name, "data") marathon_config = sdk_marathon.get_config(foldered_name) log.info("marathon config: ") log.info(marathon_config) expiry_ms = int(marathon_config["env"][app_config_field]) marathon_config["env"][app_config_field] = str(expiry_ms + 1) sdk_marathon.update_app(marathon_config, timeout=15 * 60) # All tasks should be updated because hdfs-site.xml has changed config.check_healthy(service_name=foldered_name) sdk_tasks.check_tasks_updated(foldered_name, "journal", journal_ids) sdk_tasks.check_tasks_updated(foldered_name, "name", name_ids) sdk_tasks.check_tasks_updated(foldered_name, "data", data_ids) sdk_plan.wait_for_completed_recovery(foldered_name) new_recovery_plan = sdk_plan.get_plan(foldered_name, "recovery") assert old_recovery_plan == new_recovery_plan
def setup_constraint_switch(): sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) agents = shakedown.get_private_agents() some_agent = agents[0] other_agent = agents[1] log.info('Agents: %s %s', some_agent, other_agent) assert some_agent != other_agent options = _escape_placement_for_1_9({ "service": { "yaml": "marathon_constraint" }, "hello": { "count": 1, # First, we stick the pod to some_agent "placement": "[[\"hostname\", \"LIKE\", \"{}\"]]".format(some_agent) }, "world": { "count": 0 } }) sdk_install.install(config.PACKAGE_NAME, config.SERVICE_NAME, 1, additional_options=options) sdk_tasks.check_running(config.SERVICE_NAME, 1) hello_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello') # Now, stick it to other_agent marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) marathon_config['env']['HELLO_PLACEMENT'] = "[[\"hostname\", \"LIKE\", \"{}\"]]".format(other_agent) sdk_marathon.update_app(config.SERVICE_NAME, marathon_config) # Wait for the scheduler to be up and settled before advancing. sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) return some_agent, other_agent, hello_ids
def test_custom_zookeeper(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) broker_ids = sdk_tasks.get_task_ids(foldered_name, '{}-'.format(config.DEFAULT_POD_TYPE)) # create a topic against the default zk: test_utils.create_topic(config.DEFAULT_TOPIC_NAME, service_name=foldered_name) marathon_config = sdk_marathon.get_config(foldered_name) # should be using default path when this envvar is empty/unset: assert marathon_config['env']['KAFKA_ZOOKEEPER_URI'] == '' # use a custom zk path that's WITHIN the 'dcos-service-' path, so that it's automatically cleaned up in uninstall: zk_path = 'master.mesos:2181/{}/CUSTOMPATH'.format(sdk_utils.get_zk_path(foldered_name)) marathon_config['env']['KAFKA_ZOOKEEPER_URI'] = zk_path sdk_marathon.update_app(foldered_name, marathon_config) sdk_tasks.check_tasks_updated(foldered_name, '{}-'.format(config.DEFAULT_POD_TYPE), broker_ids) sdk_plan.wait_for_completed_deployment(foldered_name) # wait for brokers to finish registering test_utils.broker_count_check(config.DEFAULT_BROKER_COUNT, service_name=foldered_name) zookeeper = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'endpoints zookeeper') assert zookeeper.rstrip('\n') == zk_path # topic created earlier against default zk should no longer be present: topic_list_info = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'topic list', json=True) test_utils.assert_topic_lists_are_equal_without_automatic_topics([], topic_list_info)
def test_deploy(): wait_time = 30 # taskcfg.yml will initially fail to deploy because several options are missing in the default # marathon.json.mustache. verify that tasks are failing for 30s before continuing. print('Checking that tasks are failing to launch for at least {}s'.format(wait_time)) # we can get brief blips of TASK_RUNNING but they shouldnt last more than 2-3s: consecutive_task_running = 0 def fn(): nonlocal consecutive_task_running svc_tasks = shakedown.get_service_tasks(PACKAGE_NAME) states = [t['state'] for t in svc_tasks] print('Task states: {}'.format(states)) if 'TASK_RUNNING' in states: consecutive_task_running += 1 assert consecutive_task_running <= 3 else: consecutive_task_running = 0 return False try: spin.time_wait_noisy(lambda: fn(), timeout_seconds=wait_time) except shakedown.TimeoutExpired: print('Timeout reached as expected') # add the needed envvars in marathon and confirm that the deployment succeeds: config = marathon.get_config(PACKAGE_NAME) env = config['env'] del env['SLEEP_DURATION'] env['TASKCFG_ALL_OUTPUT_FILENAME'] = 'output' env['TASKCFG_ALL_SLEEP_DURATION'] = '1000' marathon.update_app(PACKAGE_NAME, config) check_running()
def test_modify_app_config_rollback(): app_config_field = "TASKCFG_ALL_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS" journal_ids = sdk_tasks.get_task_ids(foldered_name, "journal") data_ids = sdk_tasks.get_task_ids(foldered_name, "data") old_config = sdk_marathon.get_config(foldered_name) marathon_config = sdk_marathon.get_config(foldered_name) log.info("marathon config: ") log.info(marathon_config) expiry_ms = int(marathon_config["env"][app_config_field]) log.info("expiry ms: " + str(expiry_ms)) marathon_config["env"][app_config_field] = str(expiry_ms + 1) sdk_marathon.update_app(marathon_config, timeout=15 * 60) # Wait for journal nodes to be affected by the change sdk_tasks.check_tasks_updated(foldered_name, "journal", journal_ids) journal_ids = sdk_tasks.get_task_ids(foldered_name, "journal") log.info("old config: ") log.info(old_config) # Put the old config back (rollback) sdk_marathon.update_app(old_config) # Wait for the journal nodes to return to their old configuration sdk_tasks.check_tasks_updated(foldered_name, "journal", journal_ids) config.check_healthy(service_name=foldered_name) marathon_config = sdk_marathon.get_config(foldered_name) assert int(marathon_config["env"][app_config_field]) == expiry_ms # Data tasks should not have been affected sdk_tasks.check_tasks_not_updated(foldered_name, "data", data_ids)
def test_port_static_to_dynamic_port(): sdk_tasks.check_running(config.SERVICE_NAME, config.DEFAULT_BROKER_COUNT) broker_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, '{}-'.format(config.DEFAULT_POD_TYPE)) marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) marathon_config['env']['BROKER_PORT'] = '0' sdk_marathon.update_app(config.SERVICE_NAME, marathon_config) sdk_tasks.check_tasks_updated(config.SERVICE_NAME, '{}-'.format(config.DEFAULT_POD_TYPE), broker_ids) # all tasks are running sdk_tasks.check_running(config.SERVICE_NAME, config.DEFAULT_BROKER_COUNT) for broker_id in range(config.DEFAULT_BROKER_COUNT): result = sdk_cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME, 'broker get {}'.format(broker_id), json=True) assert result['port'] != 9092 result = sdk_cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME, 'endpoints broker', json=True) assert len(result['address']) == config.DEFAULT_BROKER_COUNT assert len(result['dns']) == config.DEFAULT_BROKER_COUNT for port in result['address']: assert int(port.split(':')[-1]) != 9092 for port in result['dns']: assert int(port.split(':')[-1]) != 9092
def test_node_replace_replaces_node(): replace_task = [ task for task in sdk_tasks.get_summary() if task.name == 'node-2-server'][0] log.info('avoid host for task {}'.format(replace_task)) replace_pod_name = replace_task.name[:-len('-server')] # Update the placement constraints so the new node doesn't end up on the same host marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) original_constraint = marathon_config['env']['PLACEMENT_CONSTRAINT'] try: marathon_config['env']['PLACEMENT_CONSTRAINT'] = '[["hostname", "UNLIKE", "{}"]]'.format(replace_task.host) sdk_marathon.update_app(config.SERVICE_NAME, marathon_config) sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) # start replace and wait for it to finish sdk_cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME, 'pod replace {}'.format(replace_pod_name)) sdk_plan.wait_for_kicked_off_recovery(config.SERVICE_NAME) sdk_plan.wait_for_completed_recovery(config.SERVICE_NAME, timeout_seconds=RECOVERY_TIMEOUT_SECONDS) finally: # revert to prior placement setting before proceeding with tests: avoid getting stuck. marathon_config['env']['PLACEMENT_CONSTRAINT'] = original_constraint sdk_marathon.update_app(config.SERVICE_NAME, marathon_config) sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
def test_deploy(): wait_time = 30 # taskcfg.yml will initially fail to deploy because several options are missing in the default # sdk_marathon.json.mustache. verify that the tasks are failing before continuing. task_name = 'hello-0-server' log.info('Checking that {} is failing to launch within {}s'.format(task_name, wait_time)) original_statuses = sdk_tasks.get_status_history(task_name) # wait for new TASK_FAILEDs to appear: @retrying.retry( wait_fixed=1000, stop_max_delay=1000*wait_time, retry_on_result=lambda res: not res) def wait_for_new_failures(): new_statuses = sdk_tasks.get_status_history(task_name) assert len(new_statuses) >= len(original_statuses) added_statuses = new_statuses[len(original_statuses):] log.info('New {} statuses: {}'.format(task_name, ', '.join(added_statuses))) return 'TASK_FAILED' in added_statuses wait_for_new_failures() # add the needed envvars in marathon and confirm that the deployment succeeds: marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) env = marathon_config['env'] del env['SLEEP_DURATION'] env['TASKCFG_ALL_OUTPUT_FILENAME'] = 'output' env['TASKCFG_ALL_SLEEP_DURATION'] = '1000' sdk_marathon.update_app(config.SERVICE_NAME, marathon_config) config.check_running()
def test_no_change(): broker_ids = tasks.get_task_ids(SERVICE_NAME, '{}-'.format(DEFAULT_POD_TYPE)) plan1 = service_cli('plan show {}'.format(DEFAULT_PLAN_NAME)) config = marathon.get_config(SERVICE_NAME) marathon.update_app(SERVICE_NAME, config) plan2 = service_cli('plan show {}'.format(DEFAULT_PLAN_NAME)) assert plan1 == plan2 try: tasks.check_tasks_updated(SERVICE_NAME, '{}-'.format(DEFAULT_POD_TYPE), broker_ids, timeout_seconds=60) assert False, "Should not restart tasks now" except AssertionError as arg: raise arg except: pass tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT) assert plan2['status'] == 'COMPLETE' assert plan2['phases'][0]['status'] == 'COMPLETE' for step in range(DEFAULT_BROKER_COUNT): assert plan2['phases'][0]['steps'][step]['status'] == 'COMPLETE'
def setup_constraint_switch(): sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) agents = sdk_agents.get_private_agents() some_agent = agents[0]["hostname"] other_agent = agents[1]["hostname"] log.info("Agents: %s %s", some_agent, other_agent) assert some_agent != other_agent options = _escape_placement_for_1_9( { "service": {"yaml": "marathon_constraint"}, "hello": { "count": 1, # First, we stick the pod to some_agent "placement": '[["hostname", "LIKE", "{}"]]'.format(some_agent), }, "world": {"count": 0}, } ) sdk_install.install(config.PACKAGE_NAME, config.SERVICE_NAME, 1, additional_options=options) sdk_tasks.check_running(config.SERVICE_NAME, 1) hello_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, "hello") # Now, stick it to other_agent marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) marathon_config["env"]["HELLO_PLACEMENT"] = '[["hostname", "LIKE", "{}"]]'.format(other_agent) sdk_marathon.update_app(marathon_config) # Wait for the scheduler to be up and settled before advancing. sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) return some_agent, other_agent, hello_ids
def test_deploy(): wait_time_in_seconds = 600 sdk_plan.wait_for_kicked_off_deployment(config.SERVICE_NAME) # taskcfg.yml will initially fail to deploy because several options are missing in the default # sdk_marathon.json.mustache. verify that the tasks are failing before continuing. task_name = 'hello-0-server' log.info('Checking that {} is failing to launch within {}s'.format(task_name, wait_time_in_seconds)) original_state_history = _get_state_history(task_name) # wait for new TASK_FAILEDs to appear: @retrying.retry( wait_fixed=1000, stop_max_delay=1000 * wait_time_in_seconds, retry_on_result=lambda res: not res) def wait_for_new_failures(): new_state_history = _get_state_history(task_name) assert len(new_state_history) >= len(original_state_history) added_state_history = new_state_history[len(original_state_history) :] log.info("Added {} state history: {}".format(task_name, ", ".join(added_state_history))) return "TASK_FAILED" in added_state_history wait_for_new_failures() # add the needed envvars in marathon and confirm that the deployment succeeds: marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) env = marathon_config["env"] del env["SLEEP_DURATION"] env["TASKCFG_ALL_OUTPUT_FILENAME"] = "output" env["TASKCFG_ALL_SLEEP_DURATION"] = "1000" sdk_marathon.update_app(marathon_config) config.check_running()
def test_modify_app_config_rollback(): app_config_field = 'TASKCFG_ALL_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS' foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) journal_ids = sdk_tasks.get_task_ids(foldered_name, 'journal') data_ids = sdk_tasks.get_task_ids(foldered_name, 'data') old_config = sdk_marathon.get_config(foldered_name) marathon_config = sdk_marathon.get_config(foldered_name) log.info('marathon config: ') log.info(marathon_config) expiry_ms = int(marathon_config['env'][app_config_field]) log.info('expiry ms: ' + str(expiry_ms)) marathon_config['env'][app_config_field] = str(expiry_ms + 1) sdk_marathon.update_app(foldered_name, marathon_config, timeout=15 * 60) # Wait for journal nodes to be affected by the change sdk_tasks.check_tasks_updated(foldered_name, 'journal', journal_ids) journal_ids = sdk_tasks.get_task_ids(foldered_name, 'journal') log.info('old config: ') log.info(old_config) # Put the old config back (rollback) sdk_marathon.update_app(foldered_name, old_config) # Wait for the journal nodes to return to their old configuration sdk_tasks.check_tasks_updated(foldered_name, 'journal', journal_ids) config.check_healthy(service_name=foldered_name) marathon_config = sdk_marathon.get_config(foldered_name) assert int(marathon_config['env'][app_config_field]) == expiry_ms # Data tasks should not have been affected sdk_tasks.check_tasks_not_updated(foldered_name, 'data', data_ids)
def test_modify_app_config(): """This tests checks that the modification of the app config does not trigger a recovery.""" foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) sdk_plan.wait_for_completed_recovery(foldered_name) old_recovery_plan = sdk_plan.get_plan(foldered_name, "recovery") app_config_field = 'TASKCFG_ALL_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS' journal_ids = sdk_tasks.get_task_ids(foldered_name, 'journal') name_ids = sdk_tasks.get_task_ids(foldered_name, 'name') data_ids = sdk_tasks.get_task_ids(foldered_name, 'data') marathon_config = sdk_marathon.get_config(foldered_name) log.info('marathon config: ') log.info(marathon_config) expiry_ms = int(marathon_config['env'][app_config_field]) marathon_config['env'][app_config_field] = str(expiry_ms + 1) sdk_marathon.update_app(foldered_name, marathon_config, timeout=15 * 60) # All tasks should be updated because hdfs-site.xml has changed config.check_healthy(service_name=foldered_name) sdk_tasks.check_tasks_updated(foldered_name, 'journal', journal_ids) sdk_tasks.check_tasks_updated(foldered_name, 'name', name_ids) sdk_tasks.check_tasks_updated(foldered_name, 'data', data_ids) sdk_plan.wait_for_completed_recovery(foldered_name) new_recovery_plan = sdk_plan.get_plan(foldered_name, "recovery") assert old_recovery_plan == new_recovery_plan
def test_custom_zookeeper(): broker_ids = sdk_tasks.get_task_ids( FOLDERED_SERVICE_NAME, '{}-'.format(config.DEFAULT_POD_TYPE)) # create a topic against the default zk: sdk_cmd.svc_cli( config.PACKAGE_NAME, FOLDERED_SERVICE_NAME, 'topic create {}'.format(config.DEFAULT_TOPIC_NAME), json=True) assert sdk_cmd.svc_cli( config.PACKAGE_NAME, FOLDERED_SERVICE_NAME, 'topic list', json=True) == [config.DEFAULT_TOPIC_NAME] marathon_config = sdk_marathon.get_config(FOLDERED_SERVICE_NAME) # should be using default path when this envvar is empty/unset: assert marathon_config['env']['KAFKA_ZOOKEEPER_URI'] == '' # use a custom zk path that's WITHIN the 'dcos-service-' path, so that it's automatically cleaned up in uninstall: zk_path = 'master.mesos:2181/{}/CUSTOMPATH'.format(ZK_SERVICE_PATH) marathon_config['env']['KAFKA_ZOOKEEPER_URI'] = zk_path sdk_marathon.update_app(FOLDERED_SERVICE_NAME, marathon_config) sdk_tasks.check_tasks_updated( FOLDERED_SERVICE_NAME, '{}-'.format(config.DEFAULT_POD_TYPE), broker_ids) sdk_plan.wait_for_completed_deployment(FOLDERED_SERVICE_NAME) # wait for brokers to finish registering test_utils.broker_count_check(config.DEFAULT_BROKER_COUNT, service_name=FOLDERED_SERVICE_NAME) zookeeper = sdk_cmd.svc_cli( config.PACKAGE_NAME, FOLDERED_SERVICE_NAME, 'endpoints zookeeper') assert zookeeper.rstrip('\n') == zk_path # topic created earlier against default zk should no longer be present: assert sdk_cmd.svc_cli(config.PACKAGE_NAME, FOLDERED_SERVICE_NAME, 'topic list', json=True) == []
def change_region_config(region_name): service_config = sdk_marathon.get_config(config.SERVICE_NAME) if region_name is None: del service_config['env']['SERVICE_REGION'] else: service_config['env']['SERVICE_REGION'] = region_name sdk_marathon.update_app(config.SERVICE_NAME, service_config, wait_for_completed_deployment=False)
def test_custom_seccomp_profile(): sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) # uname will now be dissalowed and svc should crashloop marathon_config["env"]["HELLO_SECCOMP_PROFILE_NAME"] = "test_profile.json" sdk_marathon.update_app(marathon_config) sdk_marathon.wait_for_deployment(config.SERVICE_NAME, 60, None)
def test_uninstall(): config.check_running() # add the needed envvar in marathon and confirm that the uninstall "deployment" succeeds: marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) env = marathon_config["env"] env["SDK_UNINSTALL"] = "w00t" sdk_marathon.update_app(marathon_config) sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) sdk_tasks.check_running(config.SERVICE_NAME, 0, allow_more=False)
def test_uninstall(): config.check_running() # add the needed envvar in marathon and confirm that the uninstall "deployment" succeeds: marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) env = marathon_config['env'] env['SDK_UNINSTALL'] = 'w00t' sdk_marathon.update_app(config.SERVICE_NAME, marathon_config) sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) sdk_tasks.check_running(config.SERVICE_NAME, 0)
def test_plugin_install_and_uninstall(default_populated_index): plugin_name = 'analysis-phonetic' config = marathon.get_config(PACKAGE_NAME) config['env']['ELASTICSEARCH_PLUGINS'] = plugin_name marathon.update_app(PACKAGE_NAME, config) check_plugin_installed(plugin_name) config = marathon.get_config(PACKAGE_NAME) config['env']['ELASTICSEARCH_PLUGINS'] = "" marathon.update_app(PACKAGE_NAME, config) check_plugin_uninstalled(plugin_name)
def test_bump_node_counts(): # Run this test last, as it changes the task count config = marathon.get_config(PACKAGE_NAME) data_nodes = int(config['env']['DATA_NODE_COUNT']) config['env']['DATA_NODE_COUNT'] = str(data_nodes + 1) ingest_nodes = int(config['env']['INGEST_NODE_COUNT']) config['env']['INGEST_NODE_COUNT'] = str(ingest_nodes + 1) coordinator_nodes = int(config['env']['COORDINATOR_NODE_COUNT']) config['env']['COORDINATOR_NODE_COUNT'] = str(coordinator_nodes + 1) marathon.update_app(PACKAGE_NAME, config) tasks.check_running(PACKAGE_NAME, DEFAULT_TASK_COUNT + 3)
def test_changing_discovery_replaces_certificate_sans(hello_world_service): """ Update service configuration to change discovery prefix of a task. Scheduler should update task and new SANs should be generated. """ original_tasks = sdk_tasks.get_task_ids(config.PACKAGE_NAME, 'discovery') assert len(original_tasks) == 1, 'Expecting exactly one task ID' task_id = original_tasks[0] assert task_id # Load end-entity certificate from PEM encoded file end_entity_cert = x509.load_pem_x509_certificate( task_exec(task_id, 'cat server.crt').encode('ascii'), DEFAULT_BACKEND) san_extension = end_entity_cert.extensions.get_extension_for_oid( ExtensionOID.SUBJECT_ALTERNATIVE_NAME) sans = [ san.value for san in san_extension.value._general_names._general_names] expected_san = ( '{name}-0.{service_name}.autoip.dcos.thisdcos.directory'.format( name=DISCOVERY_TASK_PREFIX, service_name=config.SERVICE_NAME) ) assert expected_san in sans # Run task update with new discovery prefix marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) marathon_config['env']['DISCOVERY_TASK_PREFIX'] = DISCOVERY_TASK_PREFIX + '-new' sdk_marathon.update_app(config.SERVICE_NAME, marathon_config) sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'discovery', original_tasks) sdk_tasks.check_running(config.SERVICE_NAME, 4) new_task_id = sdk_tasks.get_task_ids(config.SERVICE_NAME, "discovery")[0] assert task_id != new_task_id new_cert = x509.load_pem_x509_certificate( task_exec(new_task_id, 'cat server.crt').encode('ascii'), DEFAULT_BACKEND) san_extension = new_cert.extensions.get_extension_for_oid( ExtensionOID.SUBJECT_ALTERNATIVE_NAME) sans = [ san.value for san in san_extension.value._general_names._general_names] expected_san = ( '{name}-0.{service_name}.autoip.dcos.thisdcos.directory'.format( name=DISCOVERY_TASK_PREFIX + '-new', service_name=config.SERVICE_NAME) ) assert expected_san in sans
def test_port_dynamic_to_dynamic_port(): tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT) broker_ids = tasks.get_task_ids(SERVICE_NAME, '{}-'.format(DEFAULT_POD_TYPE)) config = marathon.get_config(SERVICE_NAME) broker_cpus = int(config['env']['BROKER_CPUS']) config['env']['BROKER_CPUS'] = str(broker_cpus + 0.1) marathon.update_app(SERVICE_NAME, config) tasks.check_tasks_updated(SERVICE_NAME, '{}-'.format(DEFAULT_POD_TYPE), broker_ids) # all tasks are running tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT)
def test_bump_node_counts(): # Run this test last, as it changes the task count marathon_config = sdk_marathon.get_config(FOLDERED_SERVICE_NAME) data_nodes = int(marathon_config['env']['DATA_NODE_COUNT']) marathon_config['env']['DATA_NODE_COUNT'] = str(data_nodes + 1) ingest_nodes = int(marathon_config['env']['INGEST_NODE_COUNT']) marathon_config['env']['INGEST_NODE_COUNT'] = str(ingest_nodes + 1) coordinator_nodes = int(marathon_config['env']['COORDINATOR_NODE_COUNT']) marathon_config['env']['COORDINATOR_NODE_COUNT'] = str( coordinator_nodes + 1) sdk_marathon.update_app(FOLDERED_SERVICE_NAME, marathon_config) sdk_tasks.check_running(FOLDERED_SERVICE_NAME, config.DEFAULT_TASK_COUNT + 3)
def test_plugin_install_and_uninstall(default_populated_index): plugin_name = 'analysis-phonetic' marathon_config = sdk_marathon.get_config(FOLDERED_SERVICE_NAME) marathon_config['env']['TASKCFG_ALL_ELASTICSEARCH_PLUGINS'] = plugin_name sdk_marathon.update_app(FOLDERED_SERVICE_NAME, marathon_config) config.check_plugin_installed( plugin_name, service_name=FOLDERED_SERVICE_NAME) marathon_config = sdk_marathon.get_config(FOLDERED_SERVICE_NAME) marathon_config['env']['TASKCFG_ALL_ELASTICSEARCH_PLUGINS'] = "" sdk_marathon.update_app(FOLDERED_SERVICE_NAME, marathon_config) config.check_plugin_uninstalled( plugin_name, service_name=FOLDERED_SERVICE_NAME)
def test_state_refresh_disable_cache(): '''Disables caching via a scheduler envvar''' foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) config.check_running(foldered_name) task_ids = sdk_tasks.get_task_ids(foldered_name, '') # caching enabled by default: stdout = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'debug state refresh_cache') assert "Received cmd: refresh" in stdout marathon_config = sdk_marathon.get_config(foldered_name) marathon_config['env']['DISABLE_STATE_CACHE'] = 'any-text-here' sdk_marathon.update_app(foldered_name, marathon_config) sdk_tasks.check_tasks_not_updated(foldered_name, '', task_ids) config.check_running(foldered_name) # caching disabled, refresh_cache should fail with a 409 error (eventually, once scheduler is up): @retrying.retry( wait_fixed=1000, stop_max_delay=120*1000, retry_on_result=lambda res: not res) def check_cache_refresh_fails_409conflict(): output = sdk_cmd.svc_cli( config.PACKAGE_NAME, foldered_name, 'debug state refresh_cache', return_stderr_in_stdout=True) return "failed: 409 Conflict" in output check_cache_refresh_fails_409conflict() marathon_config = sdk_marathon.get_config(foldered_name) del marathon_config['env']['DISABLE_STATE_CACHE'] sdk_marathon.update_app(foldered_name, marathon_config) sdk_tasks.check_tasks_not_updated(foldered_name, '', task_ids) config.check_running(foldered_name) shakedown.deployment_wait() # ensure marathon thinks the deployment is complete too # caching reenabled, refresh_cache should succeed (eventually, once scheduler is up): @retrying.retry( wait_fixed=1000, stop_max_delay=120*1000, retry_on_result=lambda res: not res) def check_cache_refresh(): return sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'debug state refresh_cache') stdout = check_cache_refresh() assert "Received cmd: refresh" in stdout
def test_changing_discovery_replaces_certificate_sans(): """ Update service configuration to change discovery prefix of a task. Scheduler should update task and new SANs should be generated. """ # Load end-entity certificate from PEM encoded file _, stdout, _ = sdk_cmd.service_task_exec( config.SERVICE_NAME, "discovery-0-node", "cat server.crt" ) log.info("first server.crt: {}".format(stdout)) ascii_cert = stdout.encode("ascii") log.info("first server.crt ascii encoded: {}".format(ascii_cert)) end_entity_cert = x509.load_pem_x509_certificate(ascii_cert, DEFAULT_BACKEND) san_extension = end_entity_cert.extensions.get_extension_for_oid( ExtensionOID.SUBJECT_ALTERNATIVE_NAME ) sans = [san.value for san in san_extension.value._general_names._general_names] expected_san = "{name}-0.{service_name}.autoip.dcos.thisdcos.directory".format( name=DISCOVERY_TASK_PREFIX, service_name=config.SERVICE_NAME ) assert expected_san in sans # Run task update with new discovery prefix marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) marathon_config["env"]["DISCOVERY_TASK_PREFIX"] = DISCOVERY_TASK_PREFIX + "-new" sdk_marathon.update_app(marathon_config) sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) _, stdout, _ = sdk_cmd.service_task_exec( config.SERVICE_NAME, "discovery-0-node", "cat server.crt" ) log.info("second server.crt: {}".format(stdout)) ascii_cert = stdout.encode("ascii") log.info("second server.crt ascii encoded: {}".format(ascii_cert)) new_cert = x509.load_pem_x509_certificate(ascii_cert, DEFAULT_BACKEND) san_extension = new_cert.extensions.get_extension_for_oid(ExtensionOID.SUBJECT_ALTERNATIVE_NAME) sans = [san.value for san in san_extension.value._general_names._general_names] expected_san = "{name}-0.{service_name}.autoip.dcos.thisdcos.directory".format( name=DISCOVERY_TASK_PREFIX + "-new", service_name=config.SERVICE_NAME ) assert expected_san in sans
def test_can_adjust_config_from_dynamic_to_static_port(): tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT) broker_ids = tasks.get_task_ids(SERVICE_NAME, '{}-'.format(DEFAULT_POD_TYPE)) config = marathon.get_config(SERVICE_NAME) config['env']['BROKER_PORT'] = '9092' marathon.update_app(SERVICE_NAME, config) tasks.check_tasks_updated(SERVICE_NAME, '{}-'.format(DEFAULT_POD_TYPE), broker_ids) # all tasks are running tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT) for broker_id in range(DEFAULT_BROKER_COUNT): result = service_cli('broker get {}'.format(broker_id)) assert result['port'] == 9092
def test_updated_placement_constraints_not_applied_with_other_changes(): some_agent, other_agent, old_ids = setup_constraint_switch() # Additionally, modify the task count to be higher. marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) marathon_config["env"]["HELLO_COUNT"] = "2" sdk_marathon.update_app(marathon_config) # Now, an additional hello-server task will launch # where the _new_ constraint will tell it to be. sdk_tasks.check_running(config.SERVICE_NAME, 2) sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) assert get_task_host("hello-0-server") == some_agent assert get_task_host("hello-1-server") == other_agent
def test_bump_node_counts(): # bump ingest and coordinator, but NOT data, which is bumped in the following test. # we want to avoid adding two data nodes because the cluster sometimes won't have enough room for it marathon_config = sdk_marathon.get_config(foldered_name) ingest_nodes = int(marathon_config['env']['INGEST_NODE_COUNT']) marathon_config['env']['INGEST_NODE_COUNT'] = str(ingest_nodes + 1) coordinator_nodes = int(marathon_config['env']['COORDINATOR_NODE_COUNT']) marathon_config['env']['COORDINATOR_NODE_COUNT'] = str(coordinator_nodes + 1) sdk_marathon.update_app(foldered_name, marathon_config) sdk_plan.wait_for_completed_deployment(foldered_name) global current_expected_task_count current_expected_task_count += 2 sdk_tasks.check_running(foldered_name, current_expected_task_count) sdk_plan.wait_for_completed_deployment(foldered_name) sdk_plan.wait_for_completed_recovery(foldered_name)
def test_pod_replace_then_immediate_config_update(): plugin_name = 'analysis-phonetic' cfg = sdk_marathon.get_config(foldered_name) cfg['env']['TASKCFG_ALL_ELASTICSEARCH_PLUGINS'] = plugin_name cfg['env']['UPDATE_STRATEGY'] = 'parallel' sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'pod replace data-0') # issue config update immediately sdk_marathon.update_app(foldered_name, cfg) # ensure all nodes, especially data-0, get launched with the updated config config.check_elasticsearch_plugin_installed(plugin_name, service_name=foldered_name) sdk_plan.wait_for_completed_deployment(foldered_name) sdk_plan.wait_for_completed_recovery(foldered_name)
def test_node_replace_replaces_node(): pod_to_replace = 'node-2' pod_host = get_pod_host(pod_to_replace) log.info('avoid host for pod {}: {}'.format(pod_to_replace, pod_host)) # Update the placement constraints so the new node doesn't end up on the same host marathon_config = sdk_marathon.get_config(config.PACKAGE_NAME) marathon_config['env'][ 'PLACEMENT_CONSTRAINT'] = 'hostname:UNLIKE:{}'.format(pod_host) sdk_marathon.update_app(config.PACKAGE_NAME, marathon_config) sdk_plan.wait_for_completed_deployment(config.PACKAGE_NAME) # start replace and wait for it to finish cmd.run_cli('cassandra pod replace {}'.format(pod_to_replace)) sdk_plan.wait_for_kicked_off_recovery(config.PACKAGE_NAME) sdk_plan.wait_for_completed_recovery( config.PACKAGE_NAME, timeout_seconds=RECOVERY_TIMEOUT_SECONDS)
def test_modify_app_config(): app_config_field = 'TASKCFG_ALL_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_EXPIRY_MS' journal_ids = tasks.get_task_ids(FOLDERED_SERVICE_NAME, 'journal') name_ids = tasks.get_task_ids(FOLDERED_SERVICE_NAME, 'name') config = marathon.get_config(FOLDERED_SERVICE_NAME) utils.out('marathon config: ') utils.out(config) expiry_ms = int(config['env'][app_config_field]) config['env'][app_config_field] = str(expiry_ms + 1) marathon.update_app(FOLDERED_SERVICE_NAME, config) # All tasks should be updated because hdfs-site.xml has changed check_healthy() tasks.check_tasks_updated(FOLDERED_SERVICE_NAME, 'journal', journal_ids) tasks.check_tasks_updated(FOLDERED_SERVICE_NAME, 'name', name_ids) tasks.check_tasks_updated(FOLDERED_SERVICE_NAME, 'data', journal_ids)
def test_adding_data_nodes_only_restarts_masters(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) initial_master_task_ids = sdk_tasks.get_task_ids(foldered_name, "master") initial_data_task_ids = sdk_tasks.get_task_ids(foldered_name, "data") initial_coordinator_task_ids = sdk_tasks.get_task_ids( foldered_name, "coordinator") marathon_config = sdk_marathon.get_config(foldered_name) data_nodes = int(marathon_config['env']['DATA_NODE_COUNT']) marathon_config['env']['DATA_NODE_COUNT'] = str(data_nodes + 1) sdk_marathon.update_app(foldered_name, marathon_config) global current_expected_task_count current_expected_task_count += 1 sdk_tasks.check_running(foldered_name, current_expected_task_count) sdk_tasks.check_tasks_updated(foldered_name, "master", initial_master_task_ids) sdk_tasks.check_tasks_not_updated(foldered_name, "data", initial_data_task_ids) sdk_tasks.check_tasks_not_updated(foldered_name, "coordinator", initial_coordinator_task_ids)
def test_custom_zookeeper(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) broker_ids = sdk_tasks.get_task_ids(foldered_name, '{}-'.format(config.DEFAULT_POD_TYPE)) # create a topic against the default zk: sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'topic create {}'.format(config.DEFAULT_TOPIC_NAME), json=True) assert sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'topic list', json=True) == [config.DEFAULT_TOPIC_NAME] marathon_config = sdk_marathon.get_config(foldered_name) # should be using default path when this envvar is empty/unset: assert marathon_config['env']['KAFKA_ZOOKEEPER_URI'] == '' # use a custom zk path that's WITHIN the 'dcos-service-' path, so that it's automatically cleaned up in uninstall: zk_path = 'master.mesos:2181/{}/CUSTOMPATH'.format( sdk_utils.get_zk_path(foldered_name)) marathon_config['env']['KAFKA_ZOOKEEPER_URI'] = zk_path sdk_marathon.update_app(foldered_name, marathon_config) sdk_tasks.check_tasks_updated(foldered_name, '{}-'.format(config.DEFAULT_POD_TYPE), broker_ids) sdk_plan.wait_for_completed_deployment(foldered_name) # wait for brokers to finish registering test_utils.broker_count_check(config.DEFAULT_BROKER_COUNT, service_name=foldered_name) zookeeper = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'endpoints zookeeper') assert zookeeper.rstrip('\n') == zk_path # topic created earlier against default zk should no longer be present: assert sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'topic list', json=True) == []
def test_state_refresh_disable_cache(): '''Disables caching via a scheduler envvar''' foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) config.check_running(foldered_name) task_ids = sdk_tasks.get_task_ids(foldered_name, '') # caching enabled by default: stdout = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'state refresh_cache') assert "Received cmd: refresh" in stdout marathon_config = sdk_marathon.get_config(foldered_name) marathon_config['env']['DISABLE_STATE_CACHE'] = 'any-text-here' sdk_marathon.update_app(foldered_name, marathon_config) sdk_tasks.check_tasks_not_updated(foldered_name, '', task_ids) config.check_running(foldered_name) # caching disabled, refresh_cache should fail with a 409 error (eventually, once scheduler is up): def check_cache_refresh_fails_409conflict(): try: sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'state refresh_cache') except Exception as e: if "failed: 409 Conflict" in e.args[0]: return True return False shakedown.wait_for(lambda: check_cache_refresh_fails_409conflict(), timeout_seconds=120.) marathon_config = sdk_marathon.get_config(foldered_name) del marathon_config['env']['DISABLE_STATE_CACHE'] sdk_marathon.update_app(foldered_name, marathon_config) sdk_tasks.check_tasks_not_updated(foldered_name, '', task_ids) config.check_running(foldered_name) shakedown.deployment_wait() # ensure marathon thinks the deployment is complete too # caching reenabled, refresh_cache should succeed (eventually, once scheduler is up): def check_cache_refresh(): return sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'state refresh_cache') stdout = shakedown.wait_for(lambda: check_cache_refresh(), timeout_seconds=120.) assert "Received cmd: refresh" in stdout
def test_state_refresh_disable_cache(): '''Disables caching via a scheduler envvar''' check_running(FOLDERED_SERVICE_NAME) task_ids = sdk_tasks.get_task_ids(FOLDERED_SERVICE_NAME, '') # caching enabled by default: stdout = sdk_cmd.run_cli('hello-world --name={} state refresh_cache'.format(FOLDERED_SERVICE_NAME)) assert "Received cmd: refresh" in stdout config = sdk_marathon.get_config(FOLDERED_SERVICE_NAME) config['env']['DISABLE_STATE_CACHE'] = 'any-text-here' sdk_marathon.update_app(FOLDERED_SERVICE_NAME, config) sdk_tasks.check_tasks_not_updated(FOLDERED_SERVICE_NAME, '', task_ids) check_running(FOLDERED_SERVICE_NAME) # caching disabled, refresh_cache should fail with a 409 error (eventually, once scheduler is up): def check_cache_refresh_fails_409conflict(): try: sdk_cmd.run_cli('hello-world --name={} state refresh_cache'.format(FOLDERED_SERVICE_NAME)) except Exception as e: if "failed: 409 Conflict" in e.args[0]: return True return False shakedown.wait_for(lambda: check_cache_refresh_fails_409conflict(), timeout_seconds=120.) config = sdk_marathon.get_config(FOLDERED_SERVICE_NAME) del config['env']['DISABLE_STATE_CACHE'] sdk_marathon.update_app(FOLDERED_SERVICE_NAME, config) sdk_tasks.check_tasks_not_updated(FOLDERED_SERVICE_NAME, '', task_ids) check_running(FOLDERED_SERVICE_NAME) shakedown.deployment_wait() # ensure marathon thinks the deployment is complete too # caching reenabled, refresh_cache should succeed (eventually, once scheduler is up): def check_cache_refresh(): return sdk_cmd.run_cli('hello-world --name={} state refresh_cache'.format(FOLDERED_SERVICE_NAME)) stdout = shakedown.wait_for(lambda: check_cache_refresh(), timeout_seconds=120.) assert "Received cmd: refresh" in stdout
def test_custom_zookeeper(): broker_ids = sdk_tasks.get_task_ids(FOLDERED_SERVICE_NAME, '{}-'.format(config.DEFAULT_POD_TYPE)) # sanity check: brokers should be reinitialized: brokers = test_utils.service_cli('broker list', service_name=FOLDERED_SERVICE_NAME) assert set(brokers) == set( [str(i) for i in range(config.DEFAULT_BROKER_COUNT)]) # create a topic against the default zk: test_utils.service_cli('topic create {}'.format(DEFAULT_TOPIC_NAME), service_name=FOLDERED_SERVICE_NAME) assert test_utils.service_cli('topic list', service_name=FOLDERED_SERVICE_NAME) == [ DEFAULT_TOPIC_NAME ] marathon_config = sdk_marathon.get_config(FOLDERED_SERVICE_NAME) # should be using default path when this envvar is empty/unset: assert marathon_config['env']['KAFKA_ZOOKEEPER_URI'] == '' # use a custom zk path that's WITHIN the 'dcos-service-' path, so that it's automatically cleaned up in uninstall: zk_path = 'master.mesos:2181/dcos-service-{}/CUSTOMPATH'.format( ZK_SERVICE_PATH) marathon_config['env']['KAFKA_ZOOKEEPER_URI'] = zk_path sdk_marathon.update_app(FOLDERED_SERVICE_NAME, marathon_config) sdk_tasks.check_tasks_updated(FOLDERED_SERVICE_NAME, '{}-'.format(config.DEFAULT_POD_TYPE), broker_ids) sdk_plan.wait_for_completed_deployment(FOLDERED_SERVICE_NAME) zookeeper = test_utils.service_cli('endpoints zookeeper', get_json=False, service_name=FOLDERED_SERVICE_NAME) assert zookeeper.rstrip('\n') == zk_path # topic created earlier against default zk should no longer be present: assert test_utils.service_cli('topic list', service_name=FOLDERED_SERVICE_NAME) == []
def test_modify_app_config_rollback(): check_healthy() app_config_field = 'TASKCFG_ALL_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_EXPIRY_MS' journal_ids = tasks.get_task_ids(PACKAGE_NAME, 'journal') name_ids = tasks.get_task_ids(PACKAGE_NAME, 'name') zkfc_ids = tasks.get_task_ids(PACKAGE_NAME, 'zkfc') data_ids = tasks.get_task_ids(PACKAGE_NAME, 'data') print('journal ids: ' + str(journal_ids)) print('name ids: ' + str(name_ids)) print('zkfc ids: ' + str(zkfc_ids)) print('data ids: ' + str(data_ids)) old_config = marathon.get_config(PACKAGE_NAME) config = marathon.get_config(PACKAGE_NAME) print('marathon config: ') print(config) expiry_ms = int(config['env'][app_config_field]) print('expiry ms: ' + str(expiry_ms)) config['env'][app_config_field] = str(expiry_ms + 1) marathon.update_app(PACKAGE_NAME, config) # Wait for journal nodes to be affected by the change tasks.check_tasks_updated(PACKAGE_NAME, 'journal', journal_ids) journal_ids = tasks.get_task_ids(PACKAGE_NAME, 'journal') print('old config: ') print(old_config) # Put the old config back (rollback) marathon.update_app(PACKAGE_NAME, old_config) # Wait for the journal nodes to return to their old configuration tasks.check_tasks_updated(PACKAGE_NAME, 'journal', journal_ids) check_healthy() config = marathon.get_config(PACKAGE_NAME) assert int(config['env'][app_config_field]) == expiry_ms # ZKFC and Data tasks should not have been affected tasks.check_tasks_not_updated(PACKAGE_NAME, 'zkfc', zkfc_ids) tasks.check_tasks_not_updated(PACKAGE_NAME, 'data', data_ids)
def test_adding_data_node_only_restarts_masters(): initial_master_task_ids = sdk_tasks.get_task_ids(foldered_name, "master") initial_data_task_ids = sdk_tasks.get_task_ids(foldered_name, "data") initial_coordinator_task_ids = sdk_tasks.get_task_ids( foldered_name, "coordinator") marathon_config = sdk_marathon.get_config(foldered_name) data_nodes = int(marathon_config["env"]["DATA_NODE_COUNT"]) marathon_config["env"]["DATA_NODE_COUNT"] = str(data_nodes + 1) sdk_marathon.update_app(marathon_config) sdk_plan.wait_for_completed_deployment(foldered_name) global current_expected_task_count current_expected_task_count += 1 sdk_tasks.check_running(foldered_name, current_expected_task_count) sdk_tasks.check_tasks_updated(foldered_name, "master", initial_master_task_ids) sdk_tasks.check_tasks_not_updated(foldered_name, "data", initial_data_task_ids) sdk_tasks.check_tasks_not_updated(foldered_name, "coordinator", initial_coordinator_task_ids) sdk_plan.wait_for_completed_deployment(foldered_name) sdk_plan.wait_for_completed_recovery(foldered_name)
def test_custom_decommission(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) marathon_config = sdk_marathon.get_config(foldered_name) marathon_config['env']['WORLD_COUNT'] = '1' sdk_marathon.update_app(foldered_name, marathon_config) sdk_plan.wait_for_completed_plan(foldered_name, 'decommission') decommission_plan = sdk_plan.get_decommission_plan(foldered_name) log.info("decommission plan: {}".format(decommission_plan)) custom_step_name = decommission_plan['phases'][0]['steps'][0]['name'] assert "custom_decomission_step" == custom_step_name # scale back up marathon_config = sdk_marathon.get_config(foldered_name) marathon_config['env']['WORLD_COUNT'] = '2' sdk_marathon.update_app(foldered_name, marathon_config) sdk_plan.wait_for_completed_deployment(foldered_name) # Let's decommission again! marathon_config = sdk_marathon.get_config(foldered_name) marathon_config['env']['WORLD_COUNT'] = '1' sdk_marathon.update_app(foldered_name, marathon_config) sdk_plan.wait_for_completed_deployment(foldered_name) sdk_plan.wait_for_completed_plan(foldered_name, 'decommission') decommission_plan = sdk_plan.get_decommission_plan(foldered_name) log.info("decommission plan: {}".format(decommission_plan)) custom_step_name = decommission_plan['phases'][0]['steps'][0]['name'] assert "custom_decomission_step" == custom_step_name
def test_deploy(): wait_time = 30 # taskcfg.yml will initially fail to deploy because several options are missing in the default # sdk_marathon.json.mustache. verify that tasks are failing for 30s before continuing. log.info( 'Checking that tasks are failing to launch for at least {}s'.format( wait_time)) # we can get brief blips of TASK_RUNNING but they shouldnt last more than 2-3s: consecutive_task_running = 0 @retrying.retry(wait_fixed=1000, stop_max_delay=1000 * wait_time, retry_on_result=lambda res: not res) def wait(): nonlocal consecutive_task_running svc_tasks = shakedown.get_service_tasks(config.SERVICE_NAME) states = [t['state'] for t in svc_tasks] log.info('Task states: {}'.format(states)) if 'TASK_RUNNING' in states: consecutive_task_running += 1 assert consecutive_task_running <= 3 else: consecutive_task_running = 0 return False try: wait() except retrying.RetryError: log.info('Timeout reached as expected') # add the needed envvars in marathon and confirm that the deployment succeeds: marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) env = marathon_config['env'] del env['SLEEP_DURATION'] env['TASKCFG_ALL_OUTPUT_FILENAME'] = 'output' env['TASKCFG_ALL_SLEEP_DURATION'] = '1000' sdk_marathon.update_app(config.SERVICE_NAME, marathon_config) config.check_running()
def test_custom_decommission(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) marathon_config = sdk_marathon.get_config(foldered_name) marathon_config["env"]["WORLD_COUNT"] = "1" sdk_marathon.update_app(marathon_config) sdk_plan.wait_for_completed_plan(foldered_name, "decommission") decommission_plan = sdk_plan.get_decommission_plan(foldered_name) log.info(sdk_plan.plan_string("decommission", decommission_plan)) custom_step_name = decommission_plan["phases"][0]["steps"][0]["name"] assert "custom_decommission_step" == custom_step_name # scale back up marathon_config = sdk_marathon.get_config(foldered_name) marathon_config["env"]["WORLD_COUNT"] = "2" sdk_marathon.update_app(marathon_config) sdk_plan.wait_for_completed_deployment(foldered_name) # Let's decommission again! marathon_config = sdk_marathon.get_config(foldered_name) marathon_config["env"]["WORLD_COUNT"] = "1" sdk_marathon.update_app(marathon_config) sdk_plan.wait_for_completed_deployment(foldered_name) sdk_plan.wait_for_completed_plan(foldered_name, "decommission") decommission_plan = sdk_plan.get_decommission_plan(foldered_name) log.info(sdk_plan.plan_string("decommission", decommission_plan)) custom_step_name = decommission_plan["phases"][0]["steps"][0]["name"] assert "custom_decommission_step" == custom_step_name
def test_port_static_to_static_port(): tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT) broker_ids = tasks.get_task_ids(SERVICE_NAME, '{}-'.format(DEFAULT_POD_TYPE)) config = marathon.get_config(SERVICE_NAME) utils.out('Old Config :{}'.format(config)) for broker_id in range(DEFAULT_BROKER_COUNT): result = service_cli('broker get {}'.format(broker_id)) assert result['port'] == 9092 result = service_cli('endpoints broker') assert len(result['address']) == DEFAULT_BROKER_COUNT assert len(result['dns']) == DEFAULT_BROKER_COUNT for port in result['address']: assert int(port.split(':')[-1]) == 9092 for port in result['dns']: assert int(port.split(':')[-1]) == 9092 config['env']['BROKER_PORT'] = '9095' marathon.update_app(SERVICE_NAME, config) utils.out('New Config :{}'.format(config)) tasks.check_tasks_updated(SERVICE_NAME, '{}-'.format(DEFAULT_POD_TYPE), broker_ids) # all tasks are running tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT) result = service_cli('endpoints broker') assert len(result['address']) == DEFAULT_BROKER_COUNT assert len(result['dns']) == DEFAULT_BROKER_COUNT for port in result['address']: assert int(port.split(':')[-1]) == 9095 for port in result['dns']: assert int(port.split(':')[-1]) == 9095
def setup_constraint_switch(): sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) agents = shakedown.get_private_agents() some_agent = agents[0] other_agent = agents[1] log.info('Agents: %s %s', some_agent, other_agent) assert some_agent != other_agent options = _escape_placement_for_1_9({ "service": { "yaml": "marathon_constraint" }, "hello": { "count": 1, # First, we stick the pod to some_agent "placement": "[[\"hostname\", \"LIKE\", \"{}\"]]".format(some_agent) }, "world": { "count": 0 } }) sdk_install.install(config.PACKAGE_NAME, config.SERVICE_NAME, 1, additional_options=options) sdk_tasks.check_running(config.SERVICE_NAME, 1) hello_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello') # Now, stick it to other_agent marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) marathon_config['env'][ 'HELLO_PLACEMENT'] = "[[\"hostname\", \"LIKE\", \"{}\"]]".format( other_agent) sdk_marathon.update_app(config.SERVICE_NAME, marathon_config) # Wait for the scheduler to be up and settled before advancing. sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) return some_agent, other_agent, hello_ids
def test_custom_zookeeper(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) broker_ids = sdk_tasks.get_task_ids(foldered_name, "{}-".format(config.DEFAULT_POD_TYPE)) # create a topic against the default zk: test_utils.create_topic(config.DEFAULT_TOPIC_NAME, service_name=foldered_name) marathon_config = sdk_marathon.get_config(foldered_name) # should be using default path when this envvar is empty/unset: assert marathon_config["env"]["KAFKA_ZOOKEEPER_URI"] == "" # use a custom zk path that's WITHIN the 'dcos-service-' path, so that it's automatically cleaned up in uninstall: zk_path = "master.mesos:2181/{}/CUSTOMPATH".format( sdk_utils.get_zk_path(foldered_name)) marathon_config["env"]["KAFKA_ZOOKEEPER_URI"] = zk_path sdk_marathon.update_app(marathon_config) sdk_tasks.check_tasks_updated(foldered_name, "{}-".format(config.DEFAULT_POD_TYPE), broker_ids) sdk_plan.wait_for_completed_deployment(foldered_name) # wait for brokers to finish registering test_utils.broker_count_check(config.DEFAULT_BROKER_COUNT, service_name=foldered_name) zookeeper = sdk_networks.get_endpoint_string(config.PACKAGE_NAME, foldered_name, "zookeeper") assert zookeeper == zk_path # topic created earlier against default zk should no longer be present: rc, stdout, _ = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, "topic list") assert rc == 0, "Topic list command failed" test_utils.assert_topic_lists_are_equal_without_automatic_topics( [], json.loads(stdout))
def test_port_static_to_static_port(): sdk_tasks.check_running(config.SERVICE_NAME, config.DEFAULT_BROKER_COUNT) broker_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, '{}-'.format(config.DEFAULT_POD_TYPE)) marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) for broker_id in range(config.DEFAULT_BROKER_COUNT): result = test_utils.service_cli('broker get {}'.format(broker_id)) assert result['port'] == 9092 result = test_utils.service_cli('endpoints broker') assert len(result['address']) == config.DEFAULT_BROKER_COUNT assert len(result['dns']) == config.DEFAULT_BROKER_COUNT for port in result['address']: assert int(port.split(':')[-1]) == 9092 for port in result['dns']: assert int(port.split(':')[-1]) == 9092 marathon_config['env']['BROKER_PORT'] = '9095' sdk_marathon.update_app(config.SERVICE_NAME, marathon_config) sdk_tasks.check_tasks_updated(config.SERVICE_NAME, '{}-'.format(config.DEFAULT_POD_TYPE), broker_ids) # all tasks are running sdk_tasks.check_running(config.SERVICE_NAME, config.DEFAULT_BROKER_COUNT) result = test_utils.service_cli('endpoints broker') assert len(result['address']) == config.DEFAULT_BROKER_COUNT assert len(result['dns']) == config.DEFAULT_BROKER_COUNT for port in result['address']: assert int(port.split(':')[-1]) == 9095 for port in result['dns']: assert int(port.split(':')[-1]) == 9095
def test_config_update_while_partitioned(): world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world') host = sdk_hosts.system_host(config.SERVICE_NAME, "world-0-server") shakedown.partition_agent(host) service_config = sdk_marathon.get_config(config.SERVICE_NAME) updated_cpus = float(service_config['env']['WORLD_CPUS']) + 0.1 service_config['env']['WORLD_CPUS'] = str(updated_cpus) sdk_marathon.update_app(config.SERVICE_NAME, service_config, wait_for_completed_deployment=False) shakedown.reconnect_agent(host) sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids) config.check_running() all_tasks = shakedown.get_service_tasks(config.SERVICE_NAME) running_tasks = [ t for t in all_tasks if t['name'].startswith('world') and t['state'] == "TASK_RUNNING" ] assert len(running_tasks) == config.world_task_count(config.SERVICE_NAME) for t in running_tasks: assert config.close_enough(t['resources']['cpus'], updated_cpus)
def setup_constraint_switch(): sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) agents = sdk_agents.get_private_agents() some_agent = agents[0]["hostname"] other_agent = agents[1]["hostname"] log.info("Agents: %s %s", some_agent, other_agent) assert some_agent != other_agent options = _escape_placement_for_1_9({ "service": { "yaml": "marathon_constraint" }, "hello": { "count": 1, # First, we stick the pod to some_agent "placement": '[["hostname", "LIKE", "{}"]]'.format(some_agent), }, "world": { "count": 0 }, }) sdk_install.install(config.PACKAGE_NAME, config.SERVICE_NAME, 1, additional_options=options) sdk_tasks.check_running(config.SERVICE_NAME, 1) hello_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, "hello") # Now, stick it to other_agent marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) marathon_config["env"][ "HELLO_PLACEMENT"] = '[["hostname", "LIKE", "{}"]]'.format(other_agent) sdk_marathon.update_app(marathon_config) # Wait for the scheduler to be up and settled before advancing. sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) return some_agent, other_agent, hello_ids
def test_increase_count(): config = marathon.get_config(SERVICE_NAME) config['env']['BROKER_COUNT'] = str(int(config['env']['BROKER_COUNT']) + 1) marathon.update_app(SERVICE_NAME, config) try: tasks.check_running(PACKAGE_NAME, DEFAULT_BROKER_COUNT + 1, timeout_seconds=60) assert False, "Should not start task now" except AssertionError as arg: raise arg except: pass # expected to fail tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT) pl = service_cli('plan show {}'.format(DEFAULT_PLAN_NAME)) assert pl['status'] == 'WAITING' assert pl['phases'][0]['status'] == 'WAITING' for step in range(DEFAULT_BROKER_COUNT): assert pl['phases'][0]['steps'][step]['status'] == 'COMPLETE' assert pl['phases'][0]['steps'][DEFAULT_BROKER_COUNT]['status'] == 'WAITING' service_cli('plan continue {} {}'.format(DEFAULT_PLAN_NAME, DEFAULT_PHASE_NAME)) tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT + 1) broker_count_check(DEFAULT_BROKER_COUNT + 1) pl = service_cli('plan show {}'.format(DEFAULT_PLAN_NAME)) assert pl['status'] == 'COMPLETE' assert pl['phases'][0]['status'] == 'COMPLETE' for step in range(DEFAULT_BROKER_COUNT + 1): assert pl['phases'][0]['steps'][step]['status'] == 'COMPLETE'
def test_deploy(): wait_time = 30 # taskcfg.yml will initially fail to deploy because several options are missing in the default # marathon.json.mustache. verify that tasks are failing for 30s before continuing. sdk_utils.out( 'Checking that tasks are failing to launch for at least {}s'.format( wait_time)) # we can get brief blips of TASK_RUNNING but they shouldnt last more than 2-3s: consecutive_task_running = 0 def fn(): nonlocal consecutive_task_running svc_tasks = shakedown.get_service_tasks(PACKAGE_NAME) states = [t['state'] for t in svc_tasks] sdk_utils.out('Task states: {}'.format(states)) if 'TASK_RUNNING' in states: consecutive_task_running += 1 assert consecutive_task_running <= 3 else: consecutive_task_running = 0 return False try: shakedown.wait_for(lambda: fn(), timeout_seconds=wait_time) except shakedown.TimeoutExpired: sdk_utils.out('Timeout reached as expected') # add the needed envvars in marathon and confirm that the deployment succeeds: config = marathon.get_config(PACKAGE_NAME) env = config['env'] del env['SLEEP_DURATION'] env['TASKCFG_ALL_OUTPUT_FILENAME'] = 'output' env['TASKCFG_ALL_SLEEP_DURATION'] = '1000' marathon.update_app(PACKAGE_NAME, config) check_running()
def test_node_replace_replaces_node(): replace_task = [ task for task in sdk_tasks.get_summary() if task.name == 'node-2-server' ][0] log.info('avoid host for task {}'.format(replace_task)) replace_pod_name = replace_task.name[:-len('-server')] # Update the placement constraints so the new node doesn't end up on the same host marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) marathon_config['env'][ 'PLACEMENT_CONSTRAINT'] = '[["hostname", "UNLIKE", "{}"]]'.format( replace_task.host) sdk_marathon.update_app(config.SERVICE_NAME, marathon_config) sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) # start replace and wait for it to finish sdk_cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME, 'pod replace {}'.format(replace_pod_name)) sdk_plan.wait_for_kicked_off_recovery(config.SERVICE_NAME) sdk_plan.wait_for_completed_recovery( config.SERVICE_NAME, timeout_seconds=RECOVERY_TIMEOUT_SECONDS)
def test_node_replace_replaces_node(): tasks = cmd.run_cli('task') node_ip = [t for t in tasks.split('\n') if t.startswith('node-2-server')].pop().split()[1] # Update the placement constraints so the new node doesn't end up on the # same host config = marathon.get_config(PACKAGE_NAME) config['env']['PLACEMENT_CONSTRAINT'] = 'hostname:UNLIKE:{}'.format( node_ip) marathon.update_app(PACKAGE_NAME, config) plan.wait_for_completed_deployment(PACKAGE_NAME) # start replace and wait for it to finish cmd.run_cli('cassandra pods replace node-2') plan.wait_for_completed_recovery(PACKAGE_NAME) # Install replace verification job with correct node IP templated # (the job checks for that IP's absence in the peers list and also verifies # that the expected number of peers is present, meaning that the node was # replaced from Cassandra's perspective) with JobContext([VERIFY_REPLACE_JOB], NODE_IP=node_ip): spin.time_wait_noisy(lambda: try_job(VERIFY_REPLACE_JOB))
def test_port_static_to_dynamic_port(): tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT) broker_ids = tasks.get_task_ids(SERVICE_NAME, '{}-'.format(DEFAULT_POD_TYPE)) config = marathon.get_config(SERVICE_NAME) config['env']['BROKER_PORT'] = '0' marathon.update_app(SERVICE_NAME, config) tasks.check_tasks_updated(SERVICE_NAME, '{}-'.format(DEFAULT_POD_TYPE), broker_ids) # all tasks are running tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT) result = service_cli('endpoints broker') assert len(result['address']) == DEFAULT_BROKER_COUNT assert len(result['dns']) == DEFAULT_BROKER_COUNT for port in result['address']: assert int(port.split(':')[-1]) != 9092 for port in result['dns']: assert int(port.split(':')[-1]) != 9092
def test_lock(): """This test verifies that a second scheduler fails to startup when an existing scheduler is running. Without locking, the scheduler would fail during registration, but after writing its config to ZK. So in order to verify that the scheduler fails immediately, we ensure that the ZK config state is unmodified.""" foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) def get_zk_node_data(node_name): return sdk_cmd.cluster_request( "GET", "/exhibitor/exhibitor/v1/explorer/node-data?key={}".format( node_name)).json() # Get ZK state from running framework zk_path = "{}/ConfigTarget".format(sdk_utils.get_zk_path(foldered_name)) zk_config_old = get_zk_node_data(zk_path) # Get marathon app marathon_config = sdk_marathon.get_config(foldered_name) old_timestamp = marathon_config.get("lastTaskFailure", {}).get("timestamp", None) # Scale to 2 instances labels = marathon_config["labels"] original_labels = labels.copy() labels.pop("MARATHON_SINGLE_INSTANCE_APP") sdk_marathon.update_app(marathon_config) marathon_config["instances"] = 2 sdk_marathon.update_app(marathon_config, wait_for_completed_deployment=False) @retrying.retry(wait_fixed=1000, stop_max_delay=120 * 1000, retry_on_result=lambda res: not res) def wait_for_second_scheduler_to_fail(): timestamp = (sdk_marathon.get_config(foldered_name).get( "lastTaskFailure", {}).get("timestamp", None)) return timestamp != old_timestamp wait_for_second_scheduler_to_fail() # Verify ZK is unchanged zk_config_new = get_zk_node_data(zk_path) assert zk_config_old == zk_config_new # In order to prevent the second scheduler instance from obtaining a lock, we undo the "scale-up" operation marathon_config["instances"] = 1 marathon_config["labels"] = original_labels sdk_marathon.update_app(marathon_config, force=True)
def test_secrets_config_update(): # 1) install examples/secrets.yml # 2) create new Secrets, delete old Secrets # 2) update configuration with new Secrets # 4) verify secret content (using new Secrets after config update) install.uninstall(PACKAGE_NAME) create_secrets("{}/".format(PACKAGE_NAME)) install.install(PACKAGE_NAME, NUM_HELLO + NUM_WORLD, additional_options=secret_options) # launch will fail if secrets are not available or not accessible plan.wait_for_completed_deployment(PACKAGE_NAME) # tasks will fail if secret file is not created tasks.check_running(PACKAGE_NAME, NUM_HELLO + NUM_WORLD) # Verify secret content, one from each pod type # get tasks ids - only first pods hello_tasks = tasks.get_task_ids(PACKAGE_NAME, "hello-0") world_tasks = tasks.get_task_ids(PACKAGE_NAME, "world-0") # make sure it has the default value assert secret_content_default == task_exec( world_tasks[0], "bash -c 'echo $WORLD_SECRET1_ENV'") assert secret_content_default == task_exec(world_tasks[0], "cat WORLD_SECRET2_FILE") assert secret_content_default == task_exec( world_tasks[0], "cat {}/secret3".format(PACKAGE_NAME)) # hello tasks has container image assert secret_content_default == task_exec( hello_tasks[0], "bash -c 'echo $HELLO_SECRET1_ENV'") assert secret_content_default == task_exec(hello_tasks[0], "cat HELLO_SECRET1_FILE") assert secret_content_default == task_exec(hello_tasks[0], "cat HELLO_SECRET2_FILE") # clean up and delete secrets (defaults) delete_secrets("{}/".format(PACKAGE_NAME)) # create new secrets with new content -- New Value create_secrets(secret_content_arg=secret_content_alternative) config = marathon.get_config(PACKAGE_NAME) config['env']['HELLO_SECRET1'] = 'secret1' config['env']['HELLO_SECRET2'] = 'secret2' config['env']['WORLD_SECRET1'] = 'secret1' config['env']['WORLD_SECRET2'] = 'secret2' config['env']['WORLD_SECRET3'] = 'secret3' # config update marathon.update_app(PACKAGE_NAME, config) # wait till plan is complete - pods are supposed to restart plan.wait_for_completed_deployment(PACKAGE_NAME) # all tasks are running tasks.check_running(PACKAGE_NAME, NUM_HELLO + NUM_WORLD) # Verify secret content is changed # get task ids - only first pod hello_tasks = tasks.get_task_ids(PACKAGE_NAME, "hello-0") world_tasks = tasks.get_task_ids(PACKAGE_NAME, "world-0") assert secret_content_alternative == task_exec( world_tasks[0], "bash -c 'echo $WORLD_SECRET1_ENV'") assert secret_content_alternative == task_exec(world_tasks[0], "cat WORLD_SECRET2_FILE") assert secret_content_alternative == task_exec(world_tasks[0], "cat secret3") assert secret_content_alternative == task_exec( hello_tasks[0], "bash -c 'echo $HELLO_SECRET1_ENV'") assert secret_content_alternative == task_exec(hello_tasks[0], "cat HELLO_SECRET1_FILE") assert secret_content_alternative == task_exec(hello_tasks[0], "cat HELLO_SECRET2_FILE") # clean up and delete secrets delete_secrets()
def update_app(service_name, options, expected_task_count): config = sdk_marathon.get_config(service_name) config['env'].update(options) sdk_marathon.update_app(service_name, config) sdk_plan.wait_for_completed_deployment(service_name) sdk_tasks.check_running(service_name, expected_task_count)
def test_increase_cpu(): def plan_waiting(): try: pl = service_cli('plan show {}'.format(DEFAULT_PLAN_NAME)) if pl['status'] == 'WAITING': return True except: pass return False def plan_complete(): try: pl = service_cli('plan show {}'.format(DEFAULT_PLAN_NAME)) if pl['status'] == 'COMPLETE': return True except: pass return False config = marathon.get_config(SERVICE_NAME) config['env']['BROKER_CPUS'] = str(0.1 + float(config['env']['BROKER_CPUS'])) marathon.update_app(SERVICE_NAME, config) spin.time_wait_return(plan_waiting) pl = service_cli('plan show {}'.format(DEFAULT_PLAN_NAME)) assert pl['status'] == 'WAITING' assert pl['phases'][0]['status'] == 'WAITING' assert pl['phases'][0]['steps'][0]['status'] == 'WAITING' assert pl['phases'][0]['steps'][1]['status'] == 'WAITING' for step in range (2, DEFAULT_BROKER_COUNT +1 ): assert pl['phases'][0]['steps'][step]['status'] == 'PENDING' # all tasks are still running tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT + 1) broker_ids = tasks.get_task_ids(SERVICE_NAME, '{}-0-{}'.format(DEFAULT_POD_TYPE, DEFAULT_TASK_NAME)) service_cli('plan continue {} {}'.format(DEFAULT_PLAN_NAME, DEFAULT_PHASE_NAME)) tasks.check_tasks_updated(SERVICE_NAME, '{}-0-{}'.format(DEFAULT_POD_TYPE, DEFAULT_TASK_NAME), broker_ids) tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT + 1) pl = service_cli('plan show {}'.format(DEFAULT_PLAN_NAME)) assert pl['status'] == 'WAITING' assert pl['phases'][0]['status'] == 'WAITING' assert pl['phases'][0]['steps'][0]['status'] == 'COMPLETE' assert pl['phases'][0]['steps'][1]['status'] == 'WAITING' for step in range(2, DEFAULT_BROKER_COUNT + 1): assert pl['phases'][0]['steps'][step]['status'] == 'PENDING' broker_ids = tasks.get_task_ids(SERVICE_NAME, '{}-1-{}'.format(DEFAULT_POD_TYPE, DEFAULT_TASK_NAME)) service_cli('plan continue {} {}'.format(DEFAULT_PLAN_NAME, DEFAULT_PHASE_NAME)) tasks.check_tasks_updated(SERVICE_NAME, '{}-1-{}'.format(DEFAULT_POD_TYPE, DEFAULT_TASK_NAME), broker_ids) spin.time_wait_return(plan_complete) pl = service_cli('plan show {}'.format(DEFAULT_PLAN_NAME)) assert pl['status'] == 'COMPLETE' assert pl['phases'][0]['status'] == 'COMPLETE' for step in range(DEFAULT_BROKER_COUNT + 1): assert pl['phases'][0]['steps'][step]['status'] == 'COMPLETE' broker_count_check(DEFAULT_BROKER_COUNT + 1)
def test_secrets_config_update(): # 1) install examples/secrets.yml # 2) create new Secrets, delete old Secrets # 2) update configuration with new Secrets # 4) verify secret content (using new Secrets after config update) sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) create_secrets("{}/".format(config.SERVICE_NAME)) sdk_install.install( config.PACKAGE_NAME, config.SERVICE_NAME, NUM_HELLO + NUM_WORLD, additional_options=secret_options, ) # tasks will fail if secret file is not created sdk_tasks.check_running(config.SERVICE_NAME, NUM_HELLO + NUM_WORLD) # Verify secret content, one from each pod type # make sure it has the default value assert secret_content_default == read_secret( "world-0-server", "bash -c 'echo $WORLD_SECRET1_ENV'") assert secret_content_default == read_secret("world-0-server", "cat WORLD_SECRET2_FILE") assert secret_content_default == read_secret( "world-0-server", "cat {}/secret3".format(config.SERVICE_NAME)) # hello tasks has container image assert secret_content_default == read_secret( "hello-0-server", "bash -c 'echo $HELLO_SECRET1_ENV'") assert secret_content_default == read_secret("hello-0-server", "cat HELLO_SECRET1_FILE") assert secret_content_default == read_secret("hello-0-server", "cat HELLO_SECRET2_FILE") # clean up and delete secrets (defaults) delete_secrets("{}/".format(config.SERVICE_NAME)) # create new secrets with new content -- New Value create_secrets(secret_content=secret_content_alternative) marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) marathon_config["env"]["HELLO_SECRET1"] = "secret1" marathon_config["env"]["HELLO_SECRET2"] = "secret2" marathon_config["env"]["WORLD_SECRET1"] = "secret1" marathon_config["env"]["WORLD_SECRET2"] = "secret2" marathon_config["env"]["WORLD_SECRET3"] = "secret3" # config update sdk_marathon.update_app(marathon_config) # wait till plan is complete - pods are supposed to restart sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) # all tasks are running sdk_tasks.check_running(config.SERVICE_NAME, NUM_HELLO + NUM_WORLD) # Verify secret content is changed assert secret_content_alternative == read_secret( "world-0-server", "bash -c 'echo $WORLD_SECRET1_ENV'") assert secret_content_alternative == read_secret("world-0-server", "cat WORLD_SECRET2_FILE") assert secret_content_alternative == read_secret("world-0-server", "cat secret3") assert secret_content_alternative == read_secret( "hello-0-server", "bash -c 'echo $HELLO_SECRET1_ENV'") assert secret_content_alternative == read_secret("hello-0-server", "cat HELLO_SECRET1_FILE") assert secret_content_alternative == read_secret("hello-0-server", "cat HELLO_SECRET2_FILE") # clean up and delete secrets delete_secrets()
def _set_xpack(service_name, is_enabled): config = marathon.get_config(service_name) config['env']['TASKCFG_ALL_XPACK_ENABLED'] = is_enabled marathon.update_app(service_name, config) sdk_plan.wait_for_completed_deployment(service_name) tasks.check_running(service_name, DEFAULT_TASK_COUNT)
def xpack(is_enabled): config = marathon.get_config(PACKAGE_NAME) config['env']['TASKCFG_ALL_XPACK_ENABLED'] = is_enabled marathon.update_app(PACKAGE_NAME, config) sdk_plan.wait_for_completed_deployment(PACKAGE_NAME) tasks.check_running(PACKAGE_NAME, DEFAULT_TASK_COUNT)