def test_modify_app_config(): check_healthy() app_config_field = 'TASKCFG_ALL_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_EXPIRY_MS' journal_ids = tasks.get_task_ids(PACKAGE_NAME, 'journal') name_ids = tasks.get_task_ids(PACKAGE_NAME, 'name') zkfc_ids = tasks.get_task_ids(PACKAGE_NAME, 'zkfc') data_ids = tasks.get_task_ids(PACKAGE_NAME, 'data') print('journal ids: ' + str(journal_ids)) print('name ids: ' + str(name_ids)) print('zkfc ids: ' + str(zkfc_ids)) print('data ids: ' + str(data_ids)) config = marathon.get_config(PACKAGE_NAME) print('marathon config: ') print(config) expiry_ms = int(config['env'][app_config_field]) config['env'][app_config_field] = str(expiry_ms + 1) r = cmd.request('put', marathon.api_url('apps/' + PACKAGE_NAME), json=config) # All tasks should be updated because hdfs-site.xml has changed tasks.check_tasks_updated(PACKAGE_NAME, 'journal', journal_ids) tasks.check_tasks_updated(PACKAGE_NAME, 'name', name_ids) tasks.check_tasks_updated(PACKAGE_NAME, 'zkfc', zkfc_ids) tasks.check_tasks_updated(PACKAGE_NAME, 'data', journal_ids) check_healthy()
def test_deploy(): wait_time = 30 # taskcfg.yml will initially fail to deploy because several options are missing in the default # marathon.json.mustache. verify that tasks are failing for 30s before continuing. print('Checking that tasks are failing to launch for at least {}s'.format(wait_time)) # we can get brief blips of TASK_RUNNING but they shouldnt last more than 2-3s: consecutive_task_running = 0 def fn(): nonlocal consecutive_task_running svc_tasks = shakedown.get_service_tasks(PACKAGE_NAME) states = [t['state'] for t in svc_tasks] print('Task states: {}'.format(states)) if 'TASK_RUNNING' in states: consecutive_task_running += 1 assert consecutive_task_running <= 3 else: consecutive_task_running = 0 return False try: spin.time_wait_noisy(lambda: fn(), timeout_seconds=wait_time) except shakedown.TimeoutExpired: print('Timeout reached as expected') # add the needed envvars in marathon and confirm that the deployment succeeds: config = marathon.get_config(PACKAGE_NAME) env = config['env'] del env['SLEEP_DURATION'] env['TASKCFG_ALL_OUTPUT_FILENAME'] = 'output' env['TASKCFG_ALL_SLEEP_DURATION'] = '1000' cmd.request('put', marathon.api_url('apps/' + PACKAGE_NAME), json=config) check_running()
def test_modify_app_config_rollback(): check_healthy() app_config_field = 'TASKCFG_ALL_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_EXPIRY_MS' journal_ids = tasks.get_task_ids(PACKAGE_NAME, 'journal') name_ids = tasks.get_task_ids(PACKAGE_NAME, 'name') zkfc_ids = tasks.get_task_ids(PACKAGE_NAME, 'zkfc') data_ids = tasks.get_task_ids(PACKAGE_NAME, 'data') print('journal ids: ' + str(journal_ids)) print('name ids: ' + str(name_ids)) print('zkfc ids: ' + str(zkfc_ids)) print('data ids: ' + str(data_ids)) old_config = marathon.get_config(PACKAGE_NAME) config = marathon.get_config(PACKAGE_NAME) print('marathon config: ') print(config) expiry_ms = int(config['env'][app_config_field]) print('expiry ms: ' + str(expiry_ms)) config['env'][app_config_field] = str(expiry_ms + 1) r = cmd.request('put', marathon.api_url('apps/' + PACKAGE_NAME), json=config) # Wait for journal nodes to be affected by the change tasks.check_tasks_updated(PACKAGE_NAME, 'journal', journal_ids) journal_ids = tasks.get_task_ids(PACKAGE_NAME, 'journal') print('old config: ') print(old_config) # Put the old config back (rollback) r = cmd.request('put', marathon.api_url('apps/' + PACKAGE_NAME), json=old_config) # Wait for the journal nodes to return to their old configuration tasks.check_tasks_updated(PACKAGE_NAME, 'journal', journal_ids) check_healthy() config = marathon.get_config(PACKAGE_NAME) assert int(config['env'][app_config_field]) == expiry_ms # ZKFC and Data tasks should not have been affected tasks.check_tasks_not_updated(PACKAGE_NAME, 'zkfc', zkfc_ids) tasks.check_tasks_not_updated(PACKAGE_NAME, 'data', data_ids)
def test_bump_data_nodes(): check_healthy() data_ids = tasks.get_task_ids(PACKAGE_NAME, 'data') print('data ids: ' + str(data_ids)) config = marathon.get_config(PACKAGE_NAME) node_count = int(config['env']['DATA_COUNT']) + 1 config['env']['DATA_COUNT'] = str(node_count) cmd.request('put', marathon.api_url('apps/' + PACKAGE_NAME), json=config) check_healthy(DEFAULT_TASK_COUNT + 1) tasks.check_tasks_not_updated(PACKAGE_NAME, 'data', data_ids)
def test_bump_journal_cpus(): check_healthy() journal_ids = tasks.get_task_ids(PACKAGE_NAME, 'journal') print('journal ids: ' + str(journal_ids)) config = marathon.get_config(PACKAGE_NAME) print('marathon config: ') print(config) cpus = float(config['env']['JOURNAL_CPUS']) config['env']['JOURNAL_CPUS'] = str(cpus + 0.1) cmd.request('put', marathon.api_url('apps/' + PACKAGE_NAME), json=config) tasks.check_tasks_updated(PACKAGE_NAME, 'journal', journal_ids) check_healthy()
def test_deploy(): wait_time = 30 # taskcfg.yml will initially fail to deploy because several options are missing in the default # marathon.json.mustache. verify that tasks are failing for 30s before continuing. print('Checking that tasks are failing to launch for at least {}s'.format( wait_time)) # we can get brief blips of TASK_RUNNING but they shouldnt last more than 2-3s: consecutive_task_running = 0 def fn(): nonlocal consecutive_task_running svc_tasks = shakedown.get_service_tasks(PACKAGE_NAME) states = [t['state'] for t in svc_tasks] print('Task states: {}'.format(states)) if 'TASK_RUNNING' in states: consecutive_task_running += 1 assert consecutive_task_running <= 3 else: consecutive_task_running = 0 return False try: spin.time_wait_noisy(lambda: fn(), timeout_seconds=wait_time) except shakedown.TimeoutExpired: print('Timeout reached as expected') # add the needed envvars in marathon and confirm that the deployment succeeds: config = marathon.get_config(PACKAGE_NAME) env = config['env'] del env['SLEEP_DURATION'] env['TASKCFG_ALL_OUTPUT_FILENAME'] = 'output' env['TASKCFG_ALL_SLEEP_DURATION'] = '1000' cmd.request('put', marathon.api_url('apps/' + PACKAGE_NAME), json=config) check_running()