def test_all_partition():
    hosts = shakedown.get_service_ips(config.SERVICE_NAME)
    for host in hosts:
        shakedown.partition_agent(host)
    for host in hosts:
        shakedown.reconnect_agent(host)
    config.check_running()
Ejemplo n.º 2
0
def test_config_update_then_scheduler_died():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    host = sdk_marathon.get_scheduler_host(config.SERVICE_NAME)
    config.bump_world_cpus()
    sdk_cmd.kill_task_with_pattern('helloworld.scheduler.Main', host)
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()
Ejemplo n.º 3
0
def test_config_update_then_scheduler_died():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    host = sdk_marathon.get_scheduler_host(config.SERVICE_NAME)
    config.bump_world_cpus()
    sdk_cmd.kill_task_with_pattern('helloworld.scheduler.Main', host)
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()
Ejemplo n.º 4
0
def test_pods_restart_graceful_shutdown():
    options = {
        "world": {
            "kill_grace_period": 30
        }
    }

    sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)
    sdk_install.install(config.PACKAGE_NAME, config.SERVICE_NAME, config.DEFAULT_TASK_COUNT,
                        additional_options=options)

    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world-0')

    jsonobj = sdk_cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME, 'pod restart world-0', json=True)
    assert len(jsonobj) == 2
    assert jsonobj['pod'] == 'world-0'
    assert len(jsonobj['tasks']) == 1
    assert jsonobj['tasks'][0] == 'world-0-server'

    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world-0', world_ids)
    config.check_running()

    # ensure the SIGTERM was sent via the "all clean" message in the world
    # service's signal trap/handler, BUT not the shell command, indicated
    # by "echo".
    stdout = sdk_cmd.run_cli(
        "task log --completed --lines=1000 {}".format(world_ids[0]))
    clean_msg = None
    for s in stdout.split('\n'):
        if s.find('echo') < 0 and s.find('all clean') >= 0:
            clean_msg = s

    assert clean_msg is not None
Ejemplo n.º 5
0
def test_pod_restart():
    hello_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello-0')

    # get current agent id:
    jsonobj = sdk_cmd.svc_cli(config.PACKAGE_NAME,
                              config.SERVICE_NAME,
                              'pod info hello-0',
                              json=True)
    old_agent = jsonobj[0]['info']['slaveId']['value']

    jsonobj = sdk_cmd.svc_cli(config.PACKAGE_NAME,
                              config.SERVICE_NAME,
                              'pod restart hello-0',
                              json=True)
    assert len(jsonobj) == 2
    assert jsonobj['pod'] == 'hello-0'
    assert len(jsonobj['tasks']) == 1
    assert jsonobj['tasks'][0] == 'hello-0-server'

    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'hello-0', hello_ids)
    config.check_running()

    # check agent didn't move:
    jsonobj = sdk_cmd.svc_cli(config.PACKAGE_NAME,
                              config.SERVICE_NAME,
                              'pod info hello-0',
                              json=True)
    new_agent = jsonobj[0]['info']['slaveId']['value']
    assert old_agent == new_agent
Ejemplo n.º 6
0
def test_deploy():
    wait_time = 30
    # taskcfg.yml will initially fail to deploy because several options are missing in the default
    # sdk_marathon.json.mustache. verify that the tasks are failing before continuing.
    task_name = 'hello-0-server'
    log.info('Checking that {} is failing to launch within {}s'.format(task_name, wait_time))

    original_statuses = sdk_tasks.get_status_history(task_name)

    # wait for new TASK_FAILEDs to appear:
    @retrying.retry(
        wait_fixed=1000,
        stop_max_delay=1000*wait_time,
        retry_on_result=lambda res: not res)
    def wait_for_new_failures():
        new_statuses = sdk_tasks.get_status_history(task_name)
        assert len(new_statuses) >= len(original_statuses)

        added_statuses = new_statuses[len(original_statuses):]
        log.info('New {} statuses: {}'.format(task_name, ', '.join(added_statuses)))
        return 'TASK_FAILED' in added_statuses

    wait_for_new_failures()

    # add the needed envvars in marathon and confirm that the deployment succeeds:
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    env = marathon_config['env']
    del env['SLEEP_DURATION']
    env['TASKCFG_ALL_OUTPUT_FILENAME'] = 'output'
    env['TASKCFG_ALL_SLEEP_DURATION'] = '1000'
    sdk_marathon.update_app(config.SERVICE_NAME, marathon_config)

    config.check_running()
Ejemplo n.º 7
0
def test_pods_restart_graceful_shutdown():
    options = {"world": {"kill_grace_period": 30}}

    sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)
    sdk_install.install(config.PACKAGE_NAME,
                        config.SERVICE_NAME,
                        config.DEFAULT_TASK_COUNT,
                        additional_options=options)

    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world-0')

    jsonobj = sdk_cmd.svc_cli(config.PACKAGE_NAME,
                              config.SERVICE_NAME,
                              'pod restart world-0',
                              json=True)
    assert len(jsonobj) == 2
    assert jsonobj['pod'] == 'world-0'
    assert len(jsonobj['tasks']) == 1
    assert jsonobj['tasks'][0] == 'world-0-server'

    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world-0', world_ids)
    config.check_running()

    # ensure the SIGTERM was sent via the "all clean" message in the world
    # service's signal trap/handler, BUT not the shell command, indicated
    # by "echo".
    stdout = sdk_cmd.run_cli("task log --completed --lines=1000 {}".format(
        world_ids[0]))
    clean_msg = None
    for s in stdout.split('\n'):
        if s.find('echo') < 0 and s.find('all clean') >= 0:
            clean_msg = s

    assert clean_msg is not None
Ejemplo n.º 8
0
def test_kill_hello_node():
    config.check_running()
    hello_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello-0')
    sdk_cmd.kill_task_with_pattern('hello', 'hello-0-server.hello-world.mesos')
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'hello-0', hello_ids)

    config.check_running()
Ejemplo n.º 9
0
def test_deploy():
    wait_time = 30
    # taskcfg.yml will initially fail to deploy because several options are missing in the default
    # marathon.json.mustache. verify that tasks are failing for 30s before continuing.
    print('Checking that tasks are failing to launch for at least {}s'.format(wait_time))

    # we can get brief blips of TASK_RUNNING but they shouldnt last more than 2-3s:
    consecutive_task_running = 0
    def fn():
        nonlocal consecutive_task_running
        svc_tasks = shakedown.get_service_tasks(PACKAGE_NAME)
        states = [t['state'] for t in svc_tasks]
        print('Task states: {}'.format(states))
        if 'TASK_RUNNING' in states:
            consecutive_task_running += 1
            assert consecutive_task_running <= 3
        else:
            consecutive_task_running = 0
        return False

    try:
        spin.time_wait_noisy(lambda: fn(), timeout_seconds=wait_time)
    except shakedown.TimeoutExpired:
        print('Timeout reached as expected')

    # add the needed envvars in marathon and confirm that the deployment succeeds:
    config = marathon.get_config(PACKAGE_NAME)
    env = config['env']
    del env['SLEEP_DURATION']
    env['TASKCFG_ALL_OUTPUT_FILENAME'] = 'output'
    env['TASKCFG_ALL_SLEEP_DURATION'] = '1000'
    marathon.update_app(PACKAGE_NAME, config)

    check_running()
Ejemplo n.º 10
0
def test_deploy():
    wait_time = 30
    # taskcfg.yml will initially fail to deploy because several options are missing in the default
    # sdk_marathon.json.mustache. verify that the tasks are failing before continuing.
    task_name = "hello-0-server"
    log.info("Checking that {} is failing to launch within {}s".format(task_name, wait_time))

    original_state_history = _get_state_history(task_name)

    # wait for new TASK_FAILEDs to appear:
    @retrying.retry(
        wait_fixed=1000, stop_max_delay=1000 * wait_time, retry_on_result=lambda res: not res
    )
    def wait_for_new_failures():
        new_state_history = _get_state_history(task_name)
        assert len(new_state_history) >= len(original_state_history)

        added_state_history = new_state_history[len(original_state_history) :]
        log.info("Added {} state history: {}".format(task_name, ", ".join(added_state_history)))
        return "TASK_FAILED" in added_state_history

    wait_for_new_failures()

    # add the needed envvars in marathon and confirm that the deployment succeeds:
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    env = marathon_config["env"]
    del env["SLEEP_DURATION"]
    env["TASKCFG_ALL_OUTPUT_FILENAME"] = "output"
    env["TASKCFG_ALL_SLEEP_DURATION"] = "1000"
    sdk_marathon.update_app(marathon_config)

    config.check_running()
Ejemplo n.º 11
0
def test_deploy():
    wait_time = 30
    # taskcfg.yml will initially fail to deploy because several options are missing in the default
    # sdk_marathon.json.mustache. verify that the tasks are failing before continuing.
    task_name = 'hello-0-server'
    log.info('Checking that {} is failing to launch within {}s'.format(task_name, wait_time))

    original_statuses = sdk_tasks.get_status_history(task_name)

    # wait for new TASK_FAILEDs to appear:
    @retrying.retry(
        wait_fixed=1000,
        stop_max_delay=1000*wait_time,
        retry_on_result=lambda res: not res)
    def wait_for_new_failures():
        new_statuses = sdk_tasks.get_status_history(task_name)
        assert len(new_statuses) >= len(original_statuses)

        added_statuses = new_statuses[len(original_statuses):]
        log.info('New {} statuses: {}'.format(task_name, ', '.join(added_statuses)))
        return 'TASK_FAILED' in added_statuses

    wait_for_new_failures()

    # add the needed envvars in marathon and confirm that the deployment succeeds:
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    env = marathon_config['env']
    del env['SLEEP_DURATION']
    env['TASKCFG_ALL_OUTPUT_FILENAME'] = 'output'
    env['TASKCFG_ALL_SLEEP_DURATION'] = '1000'
    sdk_marathon.update_app(config.SERVICE_NAME, marathon_config)

    config.check_running()
Ejemplo n.º 12
0
def test_config_updates_then_all_executors_killed():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    hosts = shakedown.get_service_ips(config.SERVICE_NAME)
    config.bump_world_cpus()
    [sdk_cmd.kill_task_with_pattern('helloworld.executor.Main', h) for h in hosts]
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()
Ejemplo n.º 13
0
def test_deploy():
    wait_time_in_seconds = 600
    sdk_plan.wait_for_kicked_off_deployment(config.SERVICE_NAME)
    # taskcfg.yml will initially fail to deploy because several options are missing in the default
    # sdk_marathon.json.mustache. verify that the tasks are failing before continuing.
    task_name = 'hello-0-server'
    log.info('Checking that {} is failing to launch within {}s'.format(task_name, wait_time_in_seconds))

    original_state_history = _get_state_history(task_name)

    # wait for new TASK_FAILEDs to appear:
    @retrying.retry(
        wait_fixed=1000,
        stop_max_delay=1000 * wait_time_in_seconds,
        retry_on_result=lambda res: not res)
    def wait_for_new_failures():
        new_state_history = _get_state_history(task_name)
        assert len(new_state_history) >= len(original_state_history)

        added_state_history = new_state_history[len(original_state_history) :]
        log.info("Added {} state history: {}".format(task_name, ", ".join(added_state_history)))
        return "TASK_FAILED" in added_state_history

    wait_for_new_failures()

    # add the needed envvars in marathon and confirm that the deployment succeeds:
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    env = marathon_config["env"]
    del env["SLEEP_DURATION"]
    env["TASKCFG_ALL_OUTPUT_FILENAME"] = "output"
    env["TASKCFG_ALL_SLEEP_DURATION"] = "1000"
    sdk_marathon.update_app(marathon_config)

    config.check_running()
Ejemplo n.º 14
0
def test_config_update_then_scheduler_died():
    world_ids = tasks.get_task_ids(PACKAGE_NAME, 'world')
    host = marathon.get_scheduler_host(PACKAGE_NAME)
    bump_world_cpus()
    tasks.kill_task_with_pattern('helloworld.scheduler.Main', host)
    tasks.check_tasks_updated(PACKAGE_NAME, 'world', world_ids)
    check_running()
Ejemplo n.º 15
0
def test_mesos_v1_api():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    # Install Hello World using the v1 api.
    # Then, clean up afterwards.
    sdk_install.uninstall(config.PACKAGE_NAME, foldered_name)
    sdk_install.install(config.PACKAGE_NAME,
                        foldered_name,
                        config.DEFAULT_TASK_COUNT,
                        additional_options={
                            "service": {
                                "name": foldered_name,
                                "mesos_api_version": "V1"
                            }
                        })
    config.check_running(foldered_name)
    sdk_install.uninstall(config.PACKAGE_NAME, foldered_name)

    # reinstall the v0 version for the following tests
    sdk_install.install(
        config.PACKAGE_NAME,
        foldered_name,
        config.DEFAULT_TASK_COUNT,
        additional_options={"service": {
            "name": foldered_name
        }})
Ejemplo n.º 16
0
def test_modify_app_config():
    check_running()
    app_config_field = 'TASKCFG_ALL_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_EXPIRY_MS'

    journal_ids = tasks.get_task_ids(PACKAGE_NAME, 'journal')
    name_ids = tasks.get_task_ids(PACKAGE_NAME, 'name')
    zkfc_ids = tasks.get_task_ids(PACKAGE_NAME, 'zkfc')
    data_ids = tasks.get_task_ids(PACKAGE_NAME, 'data')
    print('journal ids: ' + str(journal_ids))
    print('name ids: ' + str(name_ids))
    print('zkfc ids: ' + str(zkfc_ids))
    print('data ids: ' + str(data_ids))

    config = marathon.get_config(PACKAGE_NAME)
    print('marathon config: ')
    print(config)
    expiry_ms = int(config['env'][app_config_field])
    config['env'][app_config_field] = str(expiry_ms + 1)
    r = cmd.request('put',
                    marathon.api_url('apps/' + PACKAGE_NAME),
                    json=config)

    # All tasks should be updated because hdfs-site.xml has changed
    tasks.check_tasks_updated(PACKAGE_NAME, 'journal', journal_ids)
    tasks.check_tasks_updated(PACKAGE_NAME, 'name', name_ids)
    tasks.check_tasks_updated(PACKAGE_NAME, 'zkfc', zkfc_ids)
    tasks.check_tasks_updated(PACKAGE_NAME, 'data', journal_ids)

    check_running()
Ejemplo n.º 17
0
def test_config_updates_then_all_executors_killed():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    hosts = shakedown.get_service_ips(config.SERVICE_NAME)
    config.bump_world_cpus()
    [sdk_tasks.kill_task_with_pattern('helloworld.executor.Main', h) for h in hosts]
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()
Ejemplo n.º 18
0
def test_config_update_then_kill_task_in_node():
    # kill 1 of 2 world tasks
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    config.bump_world_cpus()
    sdk_tasks.kill_task_with_pattern('world', 'world-0-server.{}.mesos'.format(config.SERVICE_NAME))
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()
Ejemplo n.º 19
0
def test_config_update_then_kill_task_in_node():
    # kill 1 of 2 world tasks
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    config.bump_world_cpus()
    sdk_cmd.kill_task_with_pattern('world', 'world-0-server.{}.mesos'.format(config.SERVICE_NAME))
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()
Ejemplo n.º 20
0
def test_kill_hello_node():
    config.check_running()
    hello_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello-0')
    sdk_cmd.kill_task_with_pattern('hello', 'hello-0-server.hello-world.mesos')
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'hello-0', hello_ids)

    config.check_running()
Ejemplo n.º 21
0
def test_config_update_then_executor_killed():
    world_ids = tasks.get_task_ids(PACKAGE_NAME, 'world')
    bump_world_cpus()
    tasks.kill_task_with_pattern(
        'helloworld.executor.Main',
        'world-0-server.{}.mesos'.format(PACKAGE_NAME))
    tasks.check_tasks_updated(PACKAGE_NAME, 'world', world_ids)
    check_running()
Ejemplo n.º 22
0
def test_config_update_then_kill_all_task_in_node():
    #  kill both world tasks
    world_ids = tasks.get_task_ids(PACKAGE_NAME, 'world')
    bump_world_cpus()
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    [tasks.kill_task_with_pattern('world', h) for h in hosts]
    tasks.check_tasks_updated(PACKAGE_NAME, 'world', world_ids)
    check_running()
Ejemplo n.º 23
0
def test_config_update_then_kill_task_in_node():
    # kill 1 of 2 world tasks
    world_ids = tasks.get_task_ids(PACKAGE_NAME, 'world')
    bump_world_cpus()
    tasks.kill_task_with_pattern(
        'world', 'world-0-server.{}.mesos'.format(PACKAGE_NAME))
    tasks.check_tasks_updated(PACKAGE_NAME, 'world', world_ids)
    check_running()
Ejemplo n.º 24
0
def test_kill_hello_node():
    check_running()
    hello_ids = sdk_tasks.get_task_ids(PACKAGE_NAME, 'hello-0')
    sdk_tasks.kill_task_with_pattern('hello',
                                     'hello-0-server.hello-world.mesos')
    sdk_tasks.check_tasks_updated(PACKAGE_NAME, 'hello', hello_ids)

    check_running()
Ejemplo n.º 25
0
def test_config_update_then_kill_all_task_in_node():
    #  kill both world tasks
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    hosts = shakedown.get_service_ips(config.SERVICE_NAME)
    config.bump_world_cpus()
    [sdk_cmd.kill_task_with_pattern('world', h) for h in hosts]
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()
Ejemplo n.º 26
0
def test_config_update_then_executor_killed():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    config.bump_world_cpus()
    sdk_cmd.kill_task_with_pattern(
        'helloworld.executor.Main',
        'world-0-server.{}.mesos'.format(config.SERVICE_NAME))
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()
Ejemplo n.º 27
0
def test_config_update_then_kill_all_task_in_node():
    #  kill both world tasks
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    hosts = shakedown.get_service_ips(config.SERVICE_NAME)
    config.bump_world_cpus()
    [sdk_cmd.kill_task_with_pattern('world', h) for h in hosts]
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()
Ejemplo n.º 28
0
def test_uninstall():
    config.check_running()

    # add the needed envvar in marathon and confirm that the uninstall "deployment" succeeds:
    marathon_config = sdk_marathon.get_config(config.PACKAGE_NAME)
    env = marathon_config['env']
    env['SDK_UNINSTALL'] = 'w00t'
    sdk_marathon.update_app(config.PACKAGE_NAME, marathon_config)
    sdk_plan.wait_for_completed_deployment(config.PACKAGE_NAME)
    sdk_tasks.check_running(config.PACKAGE_NAME, 0)
Ejemplo n.º 29
0
def test_uninstall():
    config.check_running()

    # add the needed envvar in marathon and confirm that the uninstall "deployment" succeeds:
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    env = marathon_config["env"]
    env["SDK_UNINSTALL"] = "w00t"
    sdk_marathon.update_app(marathon_config)
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
    sdk_tasks.check_running(config.SERVICE_NAME, 0, allow_more=False)
Ejemplo n.º 30
0
def test_uninstall():
    config.check_running()

    # add the needed envvar in marathon and confirm that the uninstall "deployment" succeeds:
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    env = marathon_config["env"]
    env["SDK_UNINSTALL"] = "w00t"
    sdk_marathon.update_app(marathon_config)
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
    sdk_tasks.check_running(config.SERVICE_NAME, 0, allow_more=False)
Ejemplo n.º 31
0
def test_uninstall():
    config.check_running()

    # add the needed envvar in marathon and confirm that the uninstall "deployment" succeeds:
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    env = marathon_config['env']
    env['SDK_UNINSTALL'] = 'w00t'
    sdk_marathon.update_app(config.SERVICE_NAME, marathon_config)
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
    sdk_tasks.check_running(config.SERVICE_NAME, 0)
Ejemplo n.º 32
0
def test_bump_hello_nodes():
    check_running()

    hello_ids = tasks.get_task_ids(PACKAGE_NAME, 'hello')
    sdk_utils.out('hello ids: ' + str(hello_ids))

    marathon.bump_task_count_config(PACKAGE_NAME, 'HELLO_COUNT')

    check_running()
    tasks.check_tasks_not_updated(PACKAGE_NAME, 'hello', hello_ids)
Ejemplo n.º 33
0
def test_bump_hello_nodes():
    config.check_running(FOLDERED_SERVICE_NAME)

    hello_ids = sdk_tasks.get_task_ids(FOLDERED_SERVICE_NAME, 'hello')
    log.info('hello ids: ' + str(hello_ids))

    sdk_marathon.bump_task_count_config(FOLDERED_SERVICE_NAME, 'HELLO_COUNT')

    config.check_running(FOLDERED_SERVICE_NAME)
    sdk_tasks.check_tasks_not_updated(FOLDERED_SERVICE_NAME, 'hello', hello_ids)
Ejemplo n.º 34
0
def test_bump_hello_nodes():
    config.check_running(FOLDERED_SERVICE_NAME)

    hello_ids = sdk_tasks.get_task_ids(FOLDERED_SERVICE_NAME, 'hello')
    log.info('hello ids: ' + str(hello_ids))

    sdk_marathon.bump_task_count_config(FOLDERED_SERVICE_NAME, 'HELLO_COUNT')

    config.check_running(FOLDERED_SERVICE_NAME)
    sdk_tasks.check_tasks_not_updated(FOLDERED_SERVICE_NAME, 'hello', hello_ids)
Ejemplo n.º 35
0
def test_bump_hello_nodes():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    config.check_running(foldered_name)

    hello_ids = sdk_tasks.get_task_ids(foldered_name, 'hello')
    log.info('hello ids: ' + str(hello_ids))

    sdk_marathon.bump_task_count_config(foldered_name, 'HELLO_COUNT')

    config.check_running(foldered_name)
    sdk_tasks.check_tasks_not_updated(foldered_name, 'hello', hello_ids)
Ejemplo n.º 36
0
def test_pod_replace():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world-0')

    jsonobj = sdk_cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME, 'pod replace world-0', json=True)
    assert len(jsonobj) == 2
    assert jsonobj['pod'] == 'world-0'
    assert len(jsonobj['tasks']) == 1
    assert jsonobj['tasks'][0] == 'world-0-server'

    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world-0', world_ids)
    config.check_running()
Ejemplo n.º 37
0
def test_finish_task_restarts_on_config_update():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    config.check_running(foldered_name)
    task_name = "world-0-finish"
    world_finish_id = get_completed_task_id(task_name)
    assert world_finish_id is not None
    log.info("%s ID: %s", task_name, world_finish_id)
    config.bump_world_cpus(foldered_name)

    sdk_tasks.check_task_relaunched(task_name, world_finish_id, ensure_new_task_not_completed=False)
    config.check_running(foldered_name)
Ejemplo n.º 38
0
def test_pod_replace():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world-0')

    jsonobj = sdk_cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME, 'pod replace world-0', json=True)
    assert len(jsonobj) == 2
    assert jsonobj['pod'] == 'world-0'
    assert len(jsonobj['tasks']) == 1
    assert jsonobj['tasks'][0] == 'world-0-server'

    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world-0', world_ids)
    config.check_running()
Ejemplo n.º 39
0
def test_bump_hello_cpus():
    check_running()
    hello_ids = tasks.get_task_ids(PACKAGE_NAME, 'hello')
    print('hello ids: ' + str(hello_ids))

    config = marathon.get_config(PACKAGE_NAME)
    cpus = float(config['env']['HELLO_CPUS'])
    config['env']['HELLO_CPUS'] = str(cpus + 0.1)
    cmd.request('put', marathon.api_url('apps/' + PACKAGE_NAME), json=config)

    tasks.check_tasks_updated(PACKAGE_NAME, 'hello', hello_ids)
    check_running()
Ejemplo n.º 40
0
def test_finish_task_restarts_on_config_update():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    config.check_running(foldered_name)
    task_name = 'world-0-finish'
    world_finish_id = sdk_tasks.get_completed_task_id(task_name)
    assert world_finish_id is not None
    log.info('world_finish_id: ' + str(world_finish_id))

    updated_cpus = config.bump_world_cpus(foldered_name)

    sdk_tasks.check_task_relaunched(task_name, world_finish_id)
    config.check_running(foldered_name)
Ejemplo n.º 41
0
def test_bump_hello_cpus():
    check_running()
    hello_ids = tasks.get_task_ids(PACKAGE_NAME, 'hello')
    print('hello ids: ' + str(hello_ids))

    config = marathon.get_config(PACKAGE_NAME)
    cpus = float(config['env']['HELLO_CPUS'])
    config['env']['HELLO_CPUS'] = str(cpus + 0.1)
    cmd.request('put', marathon.api_url('apps/' + PACKAGE_NAME), json=config)

    tasks.check_tasks_updated(PACKAGE_NAME, 'hello', hello_ids)
    check_running()
Ejemplo n.º 42
0
def test_bump_world_cpus():
    check_running()
    world_ids = tasks.get_task_ids(PACKAGE_NAME, 'world')
    print('world ids: ' + str(world_ids))

    config = marathon.get_config(PACKAGE_NAME)
    cpus = float(config['env']['WORLD_CPUS'])
    config['env']['WORLD_CPUS'] = str(cpus + 0.1)
    cmd.request('put', marathon.api_url('apps/' + PACKAGE_NAME), json=config)

    tasks.check_tasks_updated(PACKAGE_NAME, 'world', world_ids)
    check_running()
Ejemplo n.º 43
0
def test_finish_task_restarts_on_config_update():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    config.check_running(foldered_name)
    task_name = 'world-0-finish'
    world_finish_id = sdk_tasks.get_completed_task_id(task_name)
    assert world_finish_id is not None
    log.info('world_finish_id: ' + str(world_finish_id))

    updated_cpus = config.bump_world_cpus(foldered_name)

    sdk_tasks.check_task_relaunched(task_name, world_finish_id)
    config.check_running(foldered_name)
Ejemplo n.º 44
0
def test_bump_hello_nodes():
    check_running()

    hello_ids = tasks.get_task_ids(PACKAGE_NAME, 'hello')
    print('hello ids: ' + str(hello_ids))

    config = marathon.get_config(PACKAGE_NAME)
    node_count = int(config['env']['HELLO_COUNT']) + 1
    config['env']['HELLO_COUNT'] = str(node_count)
    cmd.request('put', marathon.api_url('apps/' + PACKAGE_NAME), json=config)

    check_running()
    tasks.check_tasks_not_updated(PACKAGE_NAME, 'hello', hello_ids)
Ejemplo n.º 45
0
def test_bump_hello_nodes():
    check_running()

    hello_ids = tasks.get_task_ids(PACKAGE_NAME, 'hello')
    print('hello ids: ' + str(hello_ids))

    config = marathon.get_config(PACKAGE_NAME)
    node_count = int(config['env']['HELLO_COUNT']) + 1
    config['env']['HELLO_COUNT'] = str(node_count)
    marathon.update_app(PACKAGE_NAME, config)

    check_running()
    tasks.check_tasks_not_updated(PACKAGE_NAME, 'hello', hello_ids)
Ejemplo n.º 46
0
def test_integrity_on_data_node_failure():
    shakedown.wait_for(lambda: write_data_to_hdfs("data-0-node.hdfs.mesos", TEST_FILE_1_NAME), HDFS_CMD_TIMEOUT_SEC)

    # gives chance for write to succeed and replication to occur
    time.sleep(5)

    tasks.kill_task_with_pattern("DataNode", 'data-0-node.hdfs.mesos')
    tasks.kill_task_with_pattern("DataNode", 'data-1-node.hdfs.mesos')
    time.sleep(1)  # give DataNode a chance to die

    shakedown.wait_for(lambda: read_data_from_hdfs("data-2-node.hdfs.mesos", TEST_FILE_1_NAME), HDFS_CMD_TIMEOUT_SEC)

    check_running()
Ejemplo n.º 47
0
def test_bump_hello_nodes():
    check_running()

    hello_ids = tasks.get_task_ids(PACKAGE_NAME, 'hello')
    print('hello ids: ' + str(hello_ids))

    config = marathon.get_config(PACKAGE_NAME)
    node_count = int(config['env']['HELLO_COUNT']) + 1
    config['env']['HELLO_COUNT'] = str(node_count)
    cmd.request('put', marathon.api_url('apps/' + PACKAGE_NAME), json=config)

    check_running()
    tasks.check_tasks_not_updated(PACKAGE_NAME, 'hello', hello_ids)
def test_finish_task_restarts_on_config_update():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    config.check_running(foldered_name)
    task_name = "world-0-finish"
    world_finish_id = get_completed_task_id(task_name)
    assert world_finish_id is not None
    log.info("%s ID: %s", task_name, world_finish_id)
    config.bump_world_cpus(foldered_name)

    sdk_tasks.check_task_relaunched(task_name,
                                    world_finish_id,
                                    ensure_new_task_not_completed=False)
    config.check_running(foldered_name)
Ejemplo n.º 49
0
def test_integrity_on_name_node_failure():
    """
    The first name node (name-0-node) is the active name node by default when HDFS gets installed.
    This test checks that it is possible to write and read data after the first name node fails.
    """
    tasks.kill_task_with_pattern("NameNode", 'name-0-node.hdfs.mesos')
    time.sleep(1)  # give NameNode a chance to die

    shakedown.wait_for(lambda: write_data_to_hdfs("data-0-node.hdfs.mesos", TEST_FILE_2_NAME), HDFS_CMD_TIMEOUT_SEC)

    shakedown.wait_for(lambda: read_data_from_hdfs("data-2-node.hdfs.mesos", TEST_FILE_2_NAME), HDFS_CMD_TIMEOUT_SEC)

    check_running()
Ejemplo n.º 50
0
def test_once_task_does_not_restart_on_config_update():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    config.check_running(foldered_name)

    sdk_plan.wait_for_completed_deployment(foldered_name)
    task_name = 'hello-0-once'
    hello_once_id = sdk_tasks.get_completed_task_id(task_name)
    assert hello_once_id is not None
    log.info('hello_once_id: ' + str(hello_once_id))

    updated_cpus = config.bump_hello_cpus(foldered_name)

    sdk_tasks.check_task_not_relaunched(foldered_name, task_name, hello_once_id)
    config.check_running(foldered_name)
Ejemplo n.º 51
0
def test_pod_replace():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world-0')

    # get current agent id (TODO: uncomment if/when agent is guaranteed to change in a replace operation):
    #jsonobj = sdk_cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME, 'pod info world-0', json=True)
    #old_agent = jsonobj[0]['info']['slaveId']['value']

    jsonobj = sdk_cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME, 'pod replace world-0', json=True)
    assert len(jsonobj) == 2
    assert jsonobj['pod'] == 'world-0'
    assert len(jsonobj['tasks']) == 1
    assert jsonobj['tasks'][0] == 'world-0-server'

    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world-0', world_ids)
    config.check_running()
Ejemplo n.º 52
0
def test_bump_world_cpus():
    config.check_running(FOLDERED_SERVICE_NAME)
    world_ids = sdk_tasks.get_task_ids(FOLDERED_SERVICE_NAME, 'world')
    log.info('world ids: ' + str(world_ids))

    updated_cpus = config.bump_world_cpus(FOLDERED_SERVICE_NAME)

    sdk_tasks.check_tasks_updated(FOLDERED_SERVICE_NAME, 'world', world_ids)
    config.check_running(FOLDERED_SERVICE_NAME)

    all_tasks = shakedown.get_service_tasks(FOLDERED_SERVICE_NAME)
    running_tasks = [t for t in all_tasks if t['name'].startswith('world') and t['state'] == "TASK_RUNNING"]
    assert len(running_tasks) == config.world_task_count(FOLDERED_SERVICE_NAME)
    for t in running_tasks:
        assert close_enough(t['resources']['cpus'], updated_cpus)
Ejemplo n.º 53
0
def test_kill_scheduler():
    task_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, "")
    scheduler_task_prefix = sdk_marathon.get_scheduler_task_prefix(config.SERVICE_NAME)
    scheduler_ids = sdk_tasks.get_task_ids("marathon", scheduler_task_prefix)
    assert len(scheduler_ids) == 1, "Expected to find ONLY one scheduler task but found {}".format(scheduler_ids)

    sdk_cmd.kill_task_with_pattern(
        "./hello-world-scheduler/bin/helloworld",
        "nobody",
        agent_host=sdk_marathon.get_scheduler_host(config.SERVICE_NAME),
    )

    sdk_tasks.check_tasks_updated("marathon", scheduler_task_prefix, scheduler_ids)
    sdk_tasks.wait_for_active_framework(config.SERVICE_NAME)
    config.check_running()
    sdk_tasks.check_tasks_not_updated(config.SERVICE_NAME, "", task_ids)
Ejemplo n.º 54
0
def test_bump_hello_cpus():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    config.check_running(foldered_name)
    hello_ids = sdk_tasks.get_task_ids(foldered_name, 'hello')
    log.info('hello ids: ' + str(hello_ids))

    updated_cpus = config.bump_hello_cpus(foldered_name)

    sdk_tasks.check_tasks_updated(foldered_name, 'hello', hello_ids)
    config.check_running(foldered_name)

    all_tasks = shakedown.get_service_tasks(foldered_name)
    running_tasks = [t for t in all_tasks if t['name'].startswith('hello') and t['state'] == "TASK_RUNNING"]
    assert len(running_tasks) == config.hello_task_count(foldered_name)
    for t in running_tasks:
        assert config.close_enough(t['resources']['cpus'], updated_cpus)
Ejemplo n.º 55
0
def test_state_refresh_disable_cache():
    """Disables caching via a scheduler envvar"""
    config.check_running(foldered_name)
    task_ids = sdk_tasks.get_task_ids(foldered_name, "")

    # caching enabled by default:
    rc, stdout, _ = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, "debug state refresh_cache")
    assert rc == 0, "Refresh cache failed"
    assert "Received cmd: refresh" in stdout

    marathon_config = sdk_marathon.get_config(foldered_name)
    marathon_config["env"]["DISABLE_STATE_CACHE"] = "any-text-here"
    sdk_marathon.update_app(marathon_config)

    sdk_plan.wait_for_completed_deployment(foldered_name)
    sdk_tasks.check_tasks_not_updated(foldered_name, "", task_ids)

    # caching disabled, refresh_cache should fail with a 409 error (eventually, once scheduler is up):
    @retrying.retry(wait_fixed=1000, stop_max_delay=120 * 1000, retry_on_result=lambda res: not res)
    def check_cache_refresh_fails_409conflict():
        rc, stdout, stderr = sdk_cmd.svc_cli(
            config.PACKAGE_NAME, foldered_name, "debug state refresh_cache"
        )
        return rc != 0 and stdout == "" and "failed: 409 Conflict" in stderr

    check_cache_refresh_fails_409conflict()

    marathon_config = sdk_marathon.get_config(foldered_name)
    del marathon_config["env"]["DISABLE_STATE_CACHE"]
    sdk_marathon.update_app(marathon_config)

    sdk_plan.wait_for_completed_deployment(foldered_name)
    sdk_tasks.check_tasks_not_updated(foldered_name, "", task_ids)

    # caching reenabled, refresh_cache should succeed (eventually, once scheduler is up):
    @retrying.retry(wait_fixed=1000, stop_max_delay=120 * 1000, retry_on_result=lambda res: not res)
    def check_cache_refresh():
        rc, stdout, _ = sdk_cmd.svc_cli(
            config.PACKAGE_NAME, foldered_name, "debug state refresh_cache"
        )
        assert rc == 0, "Refresh cache failed"
        return stdout

    stdout = check_cache_refresh()
    assert "Received cmd: refresh" in stdout
Ejemplo n.º 56
0
def test_config_update_while_partitioned():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    host = sdk_hosts.system_host(config.SERVICE_NAME, "world-0-server")
    shakedown.partition_agent(host)

    service_config = sdk_marathon.get_config(config.SERVICE_NAME)
    updated_cpus = float(service_config['env']['WORLD_CPUS']) + 0.1
    service_config['env']['WORLD_CPUS'] = str(updated_cpus)
    sdk_marathon.update_app(config.SERVICE_NAME, service_config, wait_for_completed_deployment=False)

    shakedown.reconnect_agent(host)
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()
    all_tasks = shakedown.get_service_tasks(config.SERVICE_NAME)
    running_tasks = [t for t in all_tasks if t['name'].startswith('world') and t['state'] == "TASK_RUNNING"]
    assert len(running_tasks) == config.world_task_count(config.SERVICE_NAME)
    for t in running_tasks:
        assert config.close_enough(t['resources']['cpus'], updated_cpus)
def test_bump_hello_cpus():
    def close_enough(val0, val1):
        epsilon = 0.00001
        diff = abs(val0 - val1)
        return diff < epsilon

    config.check_running(config.SERVICE_NAME)
    hello_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello')
    log.info('hello ids: ' + str(hello_ids))

    updated_cpus = config.bump_hello_cpus(config.SERVICE_NAME)

    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'hello', hello_ids)
    config.check_running(config.SERVICE_NAME)

    all_tasks = shakedown.get_service_tasks(config.SERVICE_NAME)
    running_tasks = [t for t in all_tasks if t['name'].startswith('hello') and t['state'] == "TASK_RUNNING"]
    for t in running_tasks:
        assert close_enough(t['resources']['cpus'], updated_cpus)
Ejemplo n.º 58
0
def test_mesos_v1_api():
    # Install Hello World using the v1 api.
    # Then, clean up afterwards.
    sdk_install.uninstall(config.PACKAGE_NAME, FOLDERED_SERVICE_NAME)
    sdk_install.install(
        config.PACKAGE_NAME,
        FOLDERED_SERVICE_NAME,
        config.DEFAULT_TASK_COUNT,
        additional_options={"service": {"name": FOLDERED_SERVICE_NAME, "mesos_api_version": "V1"}}
    )
    config.check_running(FOLDERED_SERVICE_NAME)
    sdk_install.uninstall(config.PACKAGE_NAME, FOLDERED_SERVICE_NAME)

    # reinstall the v0 version for the following tests
    sdk_install.install(
        config.PACKAGE_NAME,
        FOLDERED_SERVICE_NAME,
        config.DEFAULT_TASK_COUNT,
        additional_options={"service": {"name": FOLDERED_SERVICE_NAME}})
Ejemplo n.º 59
0
def test_bump_hello_cpus():
    def close_enough(val0, val1):
        epsilon = 0.00001
        diff = abs(val0 - val1)
        return diff < epsilon

    config.check_running(config.SERVICE_NAME)
    hello_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, "hello")
    log.info("hello ids: " + str(hello_ids))

    updated_cpus = config.bump_hello_cpus(config.SERVICE_NAME)

    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, "hello", hello_ids)
    config.check_running(config.SERVICE_NAME)

    all_tasks = sdk_tasks.get_service_tasks(config.SERVICE_NAME, task_prefix="hello")
    running_tasks = [t for t in all_tasks if t.state == "TASK_RUNNING"]
    for t in running_tasks:
        assert close_enough(t.resources["cpus"], updated_cpus)