예제 #1
0
def test_modify_app_config():
    """This tests checks that the modification of the app config does not trigger a recovery."""
    sdk_plan.wait_for_completed_recovery(foldered_name)
    old_recovery_plan = sdk_plan.get_plan(foldered_name, "recovery")

    app_config_field = "TASKCFG_ALL_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS"
    journal_ids = sdk_tasks.get_task_ids(foldered_name, "journal")
    name_ids = sdk_tasks.get_task_ids(foldered_name, "name")
    data_ids = sdk_tasks.get_task_ids(foldered_name, "data")

    marathon_config = sdk_marathon.get_config(foldered_name)
    log.info("marathon config: ")
    log.info(marathon_config)
    expiry_ms = int(marathon_config["env"][app_config_field])
    marathon_config["env"][app_config_field] = str(expiry_ms + 1)
    sdk_marathon.update_app(marathon_config, timeout=15 * 60)

    # All tasks should be updated because hdfs-site.xml has changed
    config.check_healthy(service_name=foldered_name)
    sdk_tasks.check_tasks_updated(foldered_name, "journal", journal_ids)
    sdk_tasks.check_tasks_updated(foldered_name, "name", name_ids)
    sdk_tasks.check_tasks_updated(foldered_name, "data", data_ids)

    sdk_plan.wait_for_completed_recovery(foldered_name)
    new_recovery_plan = sdk_plan.get_plan(foldered_name, "recovery")
    assert old_recovery_plan == new_recovery_plan
예제 #2
0
def setup_constraint_switch():
    sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)

    agents = shakedown.get_private_agents()
    some_agent = agents[0]
    other_agent = agents[1]
    log.info('Agents: %s %s', some_agent, other_agent)
    assert some_agent != other_agent
    options = _escape_placement_for_1_9({
        "service": {
            "yaml": "marathon_constraint"
        },
        "hello": {
            "count": 1,
            # First, we stick the pod to some_agent
            "placement": "[[\"hostname\", \"LIKE\", \"{}\"]]".format(some_agent)
        },
        "world": {
            "count": 0
        }
    })
    sdk_install.install(config.PACKAGE_NAME, config.SERVICE_NAME, 1, additional_options=options)
    sdk_tasks.check_running(config.SERVICE_NAME, 1)
    hello_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello')

    # Now, stick it to other_agent
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    marathon_config['env']['HELLO_PLACEMENT'] = "[[\"hostname\", \"LIKE\", \"{}\"]]".format(other_agent)
    sdk_marathon.update_app(config.SERVICE_NAME, marathon_config)
    # Wait for the scheduler to be up and settled before advancing.
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)

    return some_agent, other_agent, hello_ids
예제 #3
0
def test_custom_zookeeper():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    broker_ids = sdk_tasks.get_task_ids(foldered_name, '{}-'.format(config.DEFAULT_POD_TYPE))

    # create a topic against the default zk:
    test_utils.create_topic(config.DEFAULT_TOPIC_NAME, service_name=foldered_name)

    marathon_config = sdk_marathon.get_config(foldered_name)
    # should be using default path when this envvar is empty/unset:
    assert marathon_config['env']['KAFKA_ZOOKEEPER_URI'] == ''

    # use a custom zk path that's WITHIN the 'dcos-service-' path, so that it's automatically cleaned up in uninstall:
    zk_path = 'master.mesos:2181/{}/CUSTOMPATH'.format(sdk_utils.get_zk_path(foldered_name))
    marathon_config['env']['KAFKA_ZOOKEEPER_URI'] = zk_path
    sdk_marathon.update_app(foldered_name, marathon_config)

    sdk_tasks.check_tasks_updated(foldered_name, '{}-'.format(config.DEFAULT_POD_TYPE), broker_ids)
    sdk_plan.wait_for_completed_deployment(foldered_name)

    # wait for brokers to finish registering
    test_utils.broker_count_check(config.DEFAULT_BROKER_COUNT, service_name=foldered_name)

    zookeeper = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'endpoints zookeeper')
    assert zookeeper.rstrip('\n') == zk_path

    # topic created earlier against default zk should no longer be present:
    topic_list_info = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'topic list', json=True)

    test_utils.assert_topic_lists_are_equal_without_automatic_topics([], topic_list_info)
예제 #4
0
def test_deploy():
    wait_time = 30
    # taskcfg.yml will initially fail to deploy because several options are missing in the default
    # marathon.json.mustache. verify that tasks are failing for 30s before continuing.
    print('Checking that tasks are failing to launch for at least {}s'.format(wait_time))

    # we can get brief blips of TASK_RUNNING but they shouldnt last more than 2-3s:
    consecutive_task_running = 0
    def fn():
        nonlocal consecutive_task_running
        svc_tasks = shakedown.get_service_tasks(PACKAGE_NAME)
        states = [t['state'] for t in svc_tasks]
        print('Task states: {}'.format(states))
        if 'TASK_RUNNING' in states:
            consecutive_task_running += 1
            assert consecutive_task_running <= 3
        else:
            consecutive_task_running = 0
        return False

    try:
        spin.time_wait_noisy(lambda: fn(), timeout_seconds=wait_time)
    except shakedown.TimeoutExpired:
        print('Timeout reached as expected')

    # add the needed envvars in marathon and confirm that the deployment succeeds:
    config = marathon.get_config(PACKAGE_NAME)
    env = config['env']
    del env['SLEEP_DURATION']
    env['TASKCFG_ALL_OUTPUT_FILENAME'] = 'output'
    env['TASKCFG_ALL_SLEEP_DURATION'] = '1000'
    marathon.update_app(PACKAGE_NAME, config)

    check_running()
예제 #5
0
def test_modify_app_config_rollback():
    app_config_field = "TASKCFG_ALL_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS"

    journal_ids = sdk_tasks.get_task_ids(foldered_name, "journal")
    data_ids = sdk_tasks.get_task_ids(foldered_name, "data")

    old_config = sdk_marathon.get_config(foldered_name)
    marathon_config = sdk_marathon.get_config(foldered_name)
    log.info("marathon config: ")
    log.info(marathon_config)
    expiry_ms = int(marathon_config["env"][app_config_field])
    log.info("expiry ms: " + str(expiry_ms))
    marathon_config["env"][app_config_field] = str(expiry_ms + 1)
    sdk_marathon.update_app(marathon_config, timeout=15 * 60)

    # Wait for journal nodes to be affected by the change
    sdk_tasks.check_tasks_updated(foldered_name, "journal", journal_ids)
    journal_ids = sdk_tasks.get_task_ids(foldered_name, "journal")

    log.info("old config: ")
    log.info(old_config)
    # Put the old config back (rollback)
    sdk_marathon.update_app(old_config)

    # Wait for the journal nodes to return to their old configuration
    sdk_tasks.check_tasks_updated(foldered_name, "journal", journal_ids)
    config.check_healthy(service_name=foldered_name)

    marathon_config = sdk_marathon.get_config(foldered_name)
    assert int(marathon_config["env"][app_config_field]) == expiry_ms

    # Data tasks should not have been affected
    sdk_tasks.check_tasks_not_updated(foldered_name, "data", data_ids)
예제 #6
0
def test_port_static_to_dynamic_port():
    sdk_tasks.check_running(config.SERVICE_NAME, config.DEFAULT_BROKER_COUNT)

    broker_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, '{}-'.format(config.DEFAULT_POD_TYPE))

    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    marathon_config['env']['BROKER_PORT'] = '0'
    sdk_marathon.update_app(config.SERVICE_NAME, marathon_config)

    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, '{}-'.format(config.DEFAULT_POD_TYPE), broker_ids)
    # all tasks are running
    sdk_tasks.check_running(config.SERVICE_NAME, config.DEFAULT_BROKER_COUNT)

    for broker_id in range(config.DEFAULT_BROKER_COUNT):
        result = sdk_cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME, 'broker get {}'.format(broker_id), json=True)
        assert result['port'] != 9092

    result = sdk_cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME, 'endpoints broker', json=True)
    assert len(result['address']) == config.DEFAULT_BROKER_COUNT
    assert len(result['dns']) == config.DEFAULT_BROKER_COUNT

    for port in result['address']:
        assert int(port.split(':')[-1]) != 9092

    for port in result['dns']:
        assert int(port.split(':')[-1]) != 9092
def test_node_replace_replaces_node():
    replace_task = [
        task for task in sdk_tasks.get_summary()
        if task.name == 'node-2-server'][0]
    log.info('avoid host for task {}'.format(replace_task))

    replace_pod_name = replace_task.name[:-len('-server')]

    # Update the placement constraints so the new node doesn't end up on the same host
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    original_constraint = marathon_config['env']['PLACEMENT_CONSTRAINT']
    try:
        marathon_config['env']['PLACEMENT_CONSTRAINT'] = '[["hostname", "UNLIKE", "{}"]]'.format(replace_task.host)
        sdk_marathon.update_app(config.SERVICE_NAME, marathon_config)

        sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)

        # start replace and wait for it to finish
        sdk_cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME, 'pod replace {}'.format(replace_pod_name))
        sdk_plan.wait_for_kicked_off_recovery(config.SERVICE_NAME)
        sdk_plan.wait_for_completed_recovery(config.SERVICE_NAME, timeout_seconds=RECOVERY_TIMEOUT_SECONDS)

    finally:
        # revert to prior placement setting before proceeding with tests: avoid getting stuck.
        marathon_config['env']['PLACEMENT_CONSTRAINT'] = original_constraint
        sdk_marathon.update_app(config.SERVICE_NAME, marathon_config)

        sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
예제 #8
0
def test_deploy():
    wait_time = 30
    # taskcfg.yml will initially fail to deploy because several options are missing in the default
    # sdk_marathon.json.mustache. verify that the tasks are failing before continuing.
    task_name = 'hello-0-server'
    log.info('Checking that {} is failing to launch within {}s'.format(task_name, wait_time))

    original_statuses = sdk_tasks.get_status_history(task_name)

    # wait for new TASK_FAILEDs to appear:
    @retrying.retry(
        wait_fixed=1000,
        stop_max_delay=1000*wait_time,
        retry_on_result=lambda res: not res)
    def wait_for_new_failures():
        new_statuses = sdk_tasks.get_status_history(task_name)
        assert len(new_statuses) >= len(original_statuses)

        added_statuses = new_statuses[len(original_statuses):]
        log.info('New {} statuses: {}'.format(task_name, ', '.join(added_statuses)))
        return 'TASK_FAILED' in added_statuses

    wait_for_new_failures()

    # add the needed envvars in marathon and confirm that the deployment succeeds:
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    env = marathon_config['env']
    del env['SLEEP_DURATION']
    env['TASKCFG_ALL_OUTPUT_FILENAME'] = 'output'
    env['TASKCFG_ALL_SLEEP_DURATION'] = '1000'
    sdk_marathon.update_app(config.SERVICE_NAME, marathon_config)

    config.check_running()
def test_no_change():

    broker_ids = tasks.get_task_ids(SERVICE_NAME, '{}-'.format(DEFAULT_POD_TYPE))
    plan1 = service_cli('plan show {}'.format(DEFAULT_PLAN_NAME))

    config = marathon.get_config(SERVICE_NAME)
    marathon.update_app(SERVICE_NAME, config)

    plan2 = service_cli('plan show {}'.format(DEFAULT_PLAN_NAME))

    assert plan1 == plan2
    try:
        tasks.check_tasks_updated(SERVICE_NAME, '{}-'.format(DEFAULT_POD_TYPE), broker_ids, timeout_seconds=60)
        assert False, "Should not restart tasks now"
    except AssertionError as arg:
        raise arg
    except:
        pass

    tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT)

    assert plan2['status'] == 'COMPLETE'
    assert plan2['phases'][0]['status'] == 'COMPLETE'

    for step in range(DEFAULT_BROKER_COUNT):
        assert plan2['phases'][0]['steps'][step]['status'] == 'COMPLETE'
예제 #10
0
def setup_constraint_switch():
    sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)

    agents = sdk_agents.get_private_agents()
    some_agent = agents[0]["hostname"]
    other_agent = agents[1]["hostname"]
    log.info("Agents: %s %s", some_agent, other_agent)
    assert some_agent != other_agent
    options = _escape_placement_for_1_9(
        {
            "service": {"yaml": "marathon_constraint"},
            "hello": {
                "count": 1,
                # First, we stick the pod to some_agent
                "placement": '[["hostname", "LIKE", "{}"]]'.format(some_agent),
            },
            "world": {"count": 0},
        }
    )
    sdk_install.install(config.PACKAGE_NAME, config.SERVICE_NAME, 1, additional_options=options)
    sdk_tasks.check_running(config.SERVICE_NAME, 1)
    hello_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, "hello")

    # Now, stick it to other_agent
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    marathon_config["env"]["HELLO_PLACEMENT"] = '[["hostname", "LIKE", "{}"]]'.format(other_agent)
    sdk_marathon.update_app(marathon_config)
    # Wait for the scheduler to be up and settled before advancing.
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)

    return some_agent, other_agent, hello_ids
예제 #11
0
def test_deploy():
    wait_time_in_seconds = 600
    sdk_plan.wait_for_kicked_off_deployment(config.SERVICE_NAME)
    # taskcfg.yml will initially fail to deploy because several options are missing in the default
    # sdk_marathon.json.mustache. verify that the tasks are failing before continuing.
    task_name = 'hello-0-server'
    log.info('Checking that {} is failing to launch within {}s'.format(task_name, wait_time_in_seconds))

    original_state_history = _get_state_history(task_name)

    # wait for new TASK_FAILEDs to appear:
    @retrying.retry(
        wait_fixed=1000,
        stop_max_delay=1000 * wait_time_in_seconds,
        retry_on_result=lambda res: not res)
    def wait_for_new_failures():
        new_state_history = _get_state_history(task_name)
        assert len(new_state_history) >= len(original_state_history)

        added_state_history = new_state_history[len(original_state_history) :]
        log.info("Added {} state history: {}".format(task_name, ", ".join(added_state_history)))
        return "TASK_FAILED" in added_state_history

    wait_for_new_failures()

    # add the needed envvars in marathon and confirm that the deployment succeeds:
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    env = marathon_config["env"]
    del env["SLEEP_DURATION"]
    env["TASKCFG_ALL_OUTPUT_FILENAME"] = "output"
    env["TASKCFG_ALL_SLEEP_DURATION"] = "1000"
    sdk_marathon.update_app(marathon_config)

    config.check_running()
예제 #12
0
def test_modify_app_config_rollback():
    app_config_field = 'TASKCFG_ALL_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS'
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)

    journal_ids = sdk_tasks.get_task_ids(foldered_name, 'journal')
    data_ids = sdk_tasks.get_task_ids(foldered_name, 'data')

    old_config = sdk_marathon.get_config(foldered_name)
    marathon_config = sdk_marathon.get_config(foldered_name)
    log.info('marathon config: ')
    log.info(marathon_config)
    expiry_ms = int(marathon_config['env'][app_config_field])
    log.info('expiry ms: ' + str(expiry_ms))
    marathon_config['env'][app_config_field] = str(expiry_ms + 1)
    sdk_marathon.update_app(foldered_name, marathon_config, timeout=15 * 60)

    # Wait for journal nodes to be affected by the change
    sdk_tasks.check_tasks_updated(foldered_name, 'journal', journal_ids)
    journal_ids = sdk_tasks.get_task_ids(foldered_name, 'journal')

    log.info('old config: ')
    log.info(old_config)
    # Put the old config back (rollback)
    sdk_marathon.update_app(foldered_name, old_config)

    # Wait for the journal nodes to return to their old configuration
    sdk_tasks.check_tasks_updated(foldered_name, 'journal', journal_ids)
    config.check_healthy(service_name=foldered_name)

    marathon_config = sdk_marathon.get_config(foldered_name)
    assert int(marathon_config['env'][app_config_field]) == expiry_ms

    # Data tasks should not have been affected
    sdk_tasks.check_tasks_not_updated(foldered_name, 'data', data_ids)
예제 #13
0
def test_modify_app_config():
    """This tests checks that the modification of the app config does not trigger a recovery."""
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    sdk_plan.wait_for_completed_recovery(foldered_name)
    old_recovery_plan = sdk_plan.get_plan(foldered_name, "recovery")

    app_config_field = 'TASKCFG_ALL_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS'
    journal_ids = sdk_tasks.get_task_ids(foldered_name, 'journal')
    name_ids = sdk_tasks.get_task_ids(foldered_name, 'name')
    data_ids = sdk_tasks.get_task_ids(foldered_name, 'data')

    marathon_config = sdk_marathon.get_config(foldered_name)
    log.info('marathon config: ')
    log.info(marathon_config)
    expiry_ms = int(marathon_config['env'][app_config_field])
    marathon_config['env'][app_config_field] = str(expiry_ms + 1)
    sdk_marathon.update_app(foldered_name, marathon_config, timeout=15 * 60)

    # All tasks should be updated because hdfs-site.xml has changed
    config.check_healthy(service_name=foldered_name)
    sdk_tasks.check_tasks_updated(foldered_name, 'journal', journal_ids)
    sdk_tasks.check_tasks_updated(foldered_name, 'name', name_ids)
    sdk_tasks.check_tasks_updated(foldered_name, 'data', data_ids)

    sdk_plan.wait_for_completed_recovery(foldered_name)
    new_recovery_plan = sdk_plan.get_plan(foldered_name, "recovery")
    assert old_recovery_plan == new_recovery_plan
예제 #14
0
def test_custom_zookeeper():
    broker_ids = sdk_tasks.get_task_ids(
        FOLDERED_SERVICE_NAME, '{}-'.format(config.DEFAULT_POD_TYPE))

    # create a topic against the default zk:
    sdk_cmd.svc_cli(
        config.PACKAGE_NAME, FOLDERED_SERVICE_NAME,
        'topic create {}'.format(config.DEFAULT_TOPIC_NAME), json=True)
    assert sdk_cmd.svc_cli(
        config.PACKAGE_NAME, FOLDERED_SERVICE_NAME,
        'topic list', json=True) == [config.DEFAULT_TOPIC_NAME]

    marathon_config = sdk_marathon.get_config(FOLDERED_SERVICE_NAME)
    # should be using default path when this envvar is empty/unset:
    assert marathon_config['env']['KAFKA_ZOOKEEPER_URI'] == ''

    # use a custom zk path that's WITHIN the 'dcos-service-' path, so that it's automatically cleaned up in uninstall:
    zk_path = 'master.mesos:2181/{}/CUSTOMPATH'.format(ZK_SERVICE_PATH)
    marathon_config['env']['KAFKA_ZOOKEEPER_URI'] = zk_path
    sdk_marathon.update_app(FOLDERED_SERVICE_NAME, marathon_config)

    sdk_tasks.check_tasks_updated(
        FOLDERED_SERVICE_NAME, '{}-'.format(config.DEFAULT_POD_TYPE), broker_ids)
    sdk_plan.wait_for_completed_deployment(FOLDERED_SERVICE_NAME)

    # wait for brokers to finish registering
    test_utils.broker_count_check(config.DEFAULT_BROKER_COUNT, service_name=FOLDERED_SERVICE_NAME)

    zookeeper = sdk_cmd.svc_cli(
        config.PACKAGE_NAME, FOLDERED_SERVICE_NAME,
        'endpoints zookeeper')
    assert zookeeper.rstrip('\n') == zk_path

    # topic created earlier against default zk should no longer be present:
    assert sdk_cmd.svc_cli(config.PACKAGE_NAME, FOLDERED_SERVICE_NAME, 'topic list', json=True) == []
def change_region_config(region_name):
    service_config = sdk_marathon.get_config(config.SERVICE_NAME)
    if region_name is None:
        del service_config['env']['SERVICE_REGION']
    else:
        service_config['env']['SERVICE_REGION'] = region_name

    sdk_marathon.update_app(config.SERVICE_NAME, service_config, wait_for_completed_deployment=False)
예제 #16
0
def test_custom_seccomp_profile():
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)

    # uname will now be dissalowed and svc should crashloop
    marathon_config["env"]["HELLO_SECCOMP_PROFILE_NAME"] = "test_profile.json"
    sdk_marathon.update_app(marathon_config)
    sdk_marathon.wait_for_deployment(config.SERVICE_NAME, 60, None)
예제 #17
0
def test_uninstall():
    config.check_running()

    # add the needed envvar in marathon and confirm that the uninstall "deployment" succeeds:
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    env = marathon_config["env"]
    env["SDK_UNINSTALL"] = "w00t"
    sdk_marathon.update_app(marathon_config)
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
    sdk_tasks.check_running(config.SERVICE_NAME, 0, allow_more=False)
예제 #18
0
def test_uninstall():
    config.check_running()

    # add the needed envvar in marathon and confirm that the uninstall "deployment" succeeds:
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    env = marathon_config['env']
    env['SDK_UNINSTALL'] = 'w00t'
    sdk_marathon.update_app(config.SERVICE_NAME, marathon_config)
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
    sdk_tasks.check_running(config.SERVICE_NAME, 0)
예제 #19
0
def test_plugin_install_and_uninstall(default_populated_index):
    plugin_name = 'analysis-phonetic'
    config = marathon.get_config(PACKAGE_NAME)
    config['env']['ELASTICSEARCH_PLUGINS'] = plugin_name
    marathon.update_app(PACKAGE_NAME, config)
    check_plugin_installed(plugin_name)

    config = marathon.get_config(PACKAGE_NAME)
    config['env']['ELASTICSEARCH_PLUGINS'] = ""
    marathon.update_app(PACKAGE_NAME, config)
    check_plugin_uninstalled(plugin_name)
예제 #20
0
def test_bump_node_counts():
    # Run this test last, as it changes the task count
    config = marathon.get_config(PACKAGE_NAME)
    data_nodes = int(config['env']['DATA_NODE_COUNT'])
    config['env']['DATA_NODE_COUNT'] = str(data_nodes + 1)
    ingest_nodes = int(config['env']['INGEST_NODE_COUNT'])
    config['env']['INGEST_NODE_COUNT'] = str(ingest_nodes + 1)
    coordinator_nodes = int(config['env']['COORDINATOR_NODE_COUNT'])
    config['env']['COORDINATOR_NODE_COUNT'] = str(coordinator_nodes + 1)
    marathon.update_app(PACKAGE_NAME, config)
    tasks.check_running(PACKAGE_NAME, DEFAULT_TASK_COUNT + 3)
예제 #21
0
def test_changing_discovery_replaces_certificate_sans(hello_world_service):
    """
    Update service configuration to change discovery prefix of a task.
    Scheduler should update task and new SANs should be generated.
    """
    original_tasks = sdk_tasks.get_task_ids(config.PACKAGE_NAME, 'discovery')
    assert len(original_tasks) == 1, 'Expecting exactly one task ID'

    task_id = original_tasks[0]
    assert task_id

    # Load end-entity certificate from PEM encoded file
    end_entity_cert = x509.load_pem_x509_certificate(
        task_exec(task_id, 'cat server.crt').encode('ascii'),
        DEFAULT_BACKEND)

    san_extension = end_entity_cert.extensions.get_extension_for_oid(
        ExtensionOID.SUBJECT_ALTERNATIVE_NAME)
    sans = [
        san.value for san in san_extension.value._general_names._general_names]

    expected_san = (
        '{name}-0.{service_name}.autoip.dcos.thisdcos.directory'.format(
            name=DISCOVERY_TASK_PREFIX,
            service_name=config.SERVICE_NAME)
        )
    assert expected_san in sans

    # Run task update with new discovery prefix
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    marathon_config['env']['DISCOVERY_TASK_PREFIX'] = DISCOVERY_TASK_PREFIX + '-new'
    sdk_marathon.update_app(config.SERVICE_NAME, marathon_config)
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'discovery', original_tasks)
    sdk_tasks.check_running(config.SERVICE_NAME, 4)
    new_task_id = sdk_tasks.get_task_ids(config.SERVICE_NAME, "discovery")[0]
    assert task_id != new_task_id

    new_cert = x509.load_pem_x509_certificate(
        task_exec(new_task_id, 'cat server.crt').encode('ascii'),
        DEFAULT_BACKEND)

    san_extension = new_cert.extensions.get_extension_for_oid(
        ExtensionOID.SUBJECT_ALTERNATIVE_NAME)
    sans = [
        san.value for san in san_extension.value._general_names._general_names]

    expected_san =  (
        '{name}-0.{service_name}.autoip.dcos.thisdcos.directory'.format(
            name=DISCOVERY_TASK_PREFIX + '-new',
            service_name=config.SERVICE_NAME)
        )
    assert expected_san in sans
def test_port_dynamic_to_dynamic_port():
    tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT)

    broker_ids = tasks.get_task_ids(SERVICE_NAME, '{}-'.format(DEFAULT_POD_TYPE))

    config = marathon.get_config(SERVICE_NAME)
    broker_cpus = int(config['env']['BROKER_CPUS'])
    config['env']['BROKER_CPUS'] = str(broker_cpus + 0.1)
    marathon.update_app(SERVICE_NAME, config)

    tasks.check_tasks_updated(SERVICE_NAME, '{}-'.format(DEFAULT_POD_TYPE), broker_ids)
    # all tasks are running
    tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT)
예제 #23
0
def test_bump_node_counts():
    # Run this test last, as it changes the task count
    marathon_config = sdk_marathon.get_config(FOLDERED_SERVICE_NAME)
    data_nodes = int(marathon_config['env']['DATA_NODE_COUNT'])
    marathon_config['env']['DATA_NODE_COUNT'] = str(data_nodes + 1)
    ingest_nodes = int(marathon_config['env']['INGEST_NODE_COUNT'])
    marathon_config['env']['INGEST_NODE_COUNT'] = str(ingest_nodes + 1)
    coordinator_nodes = int(marathon_config['env']['COORDINATOR_NODE_COUNT'])
    marathon_config['env']['COORDINATOR_NODE_COUNT'] = str(
        coordinator_nodes + 1)
    sdk_marathon.update_app(FOLDERED_SERVICE_NAME, marathon_config)
    sdk_tasks.check_running(FOLDERED_SERVICE_NAME,
                            config.DEFAULT_TASK_COUNT + 3)
예제 #24
0
def test_plugin_install_and_uninstall(default_populated_index):
    plugin_name = 'analysis-phonetic'
    marathon_config = sdk_marathon.get_config(FOLDERED_SERVICE_NAME)
    marathon_config['env']['TASKCFG_ALL_ELASTICSEARCH_PLUGINS'] = plugin_name
    sdk_marathon.update_app(FOLDERED_SERVICE_NAME, marathon_config)
    config.check_plugin_installed(
        plugin_name, service_name=FOLDERED_SERVICE_NAME)

    marathon_config = sdk_marathon.get_config(FOLDERED_SERVICE_NAME)
    marathon_config['env']['TASKCFG_ALL_ELASTICSEARCH_PLUGINS'] = ""
    sdk_marathon.update_app(FOLDERED_SERVICE_NAME, marathon_config)
    config.check_plugin_uninstalled(
        plugin_name, service_name=FOLDERED_SERVICE_NAME)
예제 #25
0
def test_state_refresh_disable_cache():
    '''Disables caching via a scheduler envvar'''
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    config.check_running(foldered_name)
    task_ids = sdk_tasks.get_task_ids(foldered_name, '')

    # caching enabled by default:
    stdout = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'debug state refresh_cache')
    assert "Received cmd: refresh" in stdout

    marathon_config = sdk_marathon.get_config(foldered_name)
    marathon_config['env']['DISABLE_STATE_CACHE'] = 'any-text-here'
    sdk_marathon.update_app(foldered_name, marathon_config)

    sdk_tasks.check_tasks_not_updated(foldered_name, '', task_ids)
    config.check_running(foldered_name)

    # caching disabled, refresh_cache should fail with a 409 error (eventually, once scheduler is up):
    @retrying.retry(
        wait_fixed=1000,
        stop_max_delay=120*1000,
        retry_on_result=lambda res: not res)
    def check_cache_refresh_fails_409conflict():
        output = sdk_cmd.svc_cli(
            config.PACKAGE_NAME,
            foldered_name,
            'debug state refresh_cache',
            return_stderr_in_stdout=True)
        return "failed: 409 Conflict" in output

    check_cache_refresh_fails_409conflict()

    marathon_config = sdk_marathon.get_config(foldered_name)
    del marathon_config['env']['DISABLE_STATE_CACHE']
    sdk_marathon.update_app(foldered_name, marathon_config)

    sdk_tasks.check_tasks_not_updated(foldered_name, '', task_ids)
    config.check_running(foldered_name)
    shakedown.deployment_wait()  # ensure marathon thinks the deployment is complete too

    # caching reenabled, refresh_cache should succeed (eventually, once scheduler is up):
    @retrying.retry(
        wait_fixed=1000,
        stop_max_delay=120*1000,
        retry_on_result=lambda res: not res)
    def check_cache_refresh():
        return sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'debug state refresh_cache')

    stdout = check_cache_refresh()
    assert "Received cmd: refresh" in stdout
예제 #26
0
def test_changing_discovery_replaces_certificate_sans():
    """
    Update service configuration to change discovery prefix of a task.
    Scheduler should update task and new SANs should be generated.
    """

    # Load end-entity certificate from PEM encoded file
    _, stdout, _ = sdk_cmd.service_task_exec(
        config.SERVICE_NAME, "discovery-0-node", "cat server.crt"
    )
    log.info("first server.crt: {}".format(stdout))

    ascii_cert = stdout.encode("ascii")
    log.info("first server.crt ascii encoded: {}".format(ascii_cert))

    end_entity_cert = x509.load_pem_x509_certificate(ascii_cert, DEFAULT_BACKEND)

    san_extension = end_entity_cert.extensions.get_extension_for_oid(
        ExtensionOID.SUBJECT_ALTERNATIVE_NAME
    )
    sans = [san.value for san in san_extension.value._general_names._general_names]

    expected_san = "{name}-0.{service_name}.autoip.dcos.thisdcos.directory".format(
        name=DISCOVERY_TASK_PREFIX, service_name=config.SERVICE_NAME
    )
    assert expected_san in sans

    # Run task update with new discovery prefix
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    marathon_config["env"]["DISCOVERY_TASK_PREFIX"] = DISCOVERY_TASK_PREFIX + "-new"
    sdk_marathon.update_app(marathon_config)
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)

    _, stdout, _ = sdk_cmd.service_task_exec(
        config.SERVICE_NAME, "discovery-0-node", "cat server.crt"
    )
    log.info("second server.crt: {}".format(stdout))

    ascii_cert = stdout.encode("ascii")
    log.info("second server.crt ascii encoded: {}".format(ascii_cert))
    new_cert = x509.load_pem_x509_certificate(ascii_cert, DEFAULT_BACKEND)

    san_extension = new_cert.extensions.get_extension_for_oid(ExtensionOID.SUBJECT_ALTERNATIVE_NAME)
    sans = [san.value for san in san_extension.value._general_names._general_names]

    expected_san = "{name}-0.{service_name}.autoip.dcos.thisdcos.directory".format(
        name=DISCOVERY_TASK_PREFIX + "-new", service_name=config.SERVICE_NAME
    )
    assert expected_san in sans
def test_can_adjust_config_from_dynamic_to_static_port():
    tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT)
    broker_ids = tasks.get_task_ids(SERVICE_NAME, '{}-'.format(DEFAULT_POD_TYPE))

    config = marathon.get_config(SERVICE_NAME)
    config['env']['BROKER_PORT'] = '9092'
    marathon.update_app(SERVICE_NAME, config)

    tasks.check_tasks_updated(SERVICE_NAME, '{}-'.format(DEFAULT_POD_TYPE), broker_ids)
    # all tasks are running
    tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT)

    for broker_id in range(DEFAULT_BROKER_COUNT):
        result = service_cli('broker get {}'.format(broker_id))
        assert result['port'] == 9092
예제 #28
0
def test_updated_placement_constraints_not_applied_with_other_changes():
    some_agent, other_agent, old_ids = setup_constraint_switch()

    # Additionally, modify the task count to be higher.
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    marathon_config["env"]["HELLO_COUNT"] = "2"
    sdk_marathon.update_app(marathon_config)

    # Now, an additional hello-server task will launch
    # where the _new_ constraint will tell it to be.
    sdk_tasks.check_running(config.SERVICE_NAME, 2)
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)

    assert get_task_host("hello-0-server") == some_agent
    assert get_task_host("hello-1-server") == other_agent
예제 #29
0
def test_bump_node_counts():
    # bump ingest and coordinator, but NOT data, which is bumped in the following test.
    # we want to avoid adding two data nodes because the cluster sometimes won't have enough room for it
    marathon_config = sdk_marathon.get_config(foldered_name)
    ingest_nodes = int(marathon_config['env']['INGEST_NODE_COUNT'])
    marathon_config['env']['INGEST_NODE_COUNT'] = str(ingest_nodes + 1)
    coordinator_nodes = int(marathon_config['env']['COORDINATOR_NODE_COUNT'])
    marathon_config['env']['COORDINATOR_NODE_COUNT'] = str(coordinator_nodes + 1)
    sdk_marathon.update_app(foldered_name, marathon_config)
    sdk_plan.wait_for_completed_deployment(foldered_name)
    global current_expected_task_count
    current_expected_task_count += 2
    sdk_tasks.check_running(foldered_name, current_expected_task_count)
    sdk_plan.wait_for_completed_deployment(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)
예제 #30
0
def test_pod_replace_then_immediate_config_update():
    plugin_name = 'analysis-phonetic'

    cfg = sdk_marathon.get_config(foldered_name)
    cfg['env']['TASKCFG_ALL_ELASTICSEARCH_PLUGINS'] = plugin_name
    cfg['env']['UPDATE_STRATEGY'] = 'parallel'

    sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'pod replace data-0')

    # issue config update immediately
    sdk_marathon.update_app(foldered_name, cfg)

    # ensure all nodes, especially data-0, get launched with the updated config
    config.check_elasticsearch_plugin_installed(plugin_name, service_name=foldered_name)
    sdk_plan.wait_for_completed_deployment(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)
예제 #31
0
def test_node_replace_replaces_node():
    pod_to_replace = 'node-2'
    pod_host = get_pod_host(pod_to_replace)
    log.info('avoid host for pod {}: {}'.format(pod_to_replace, pod_host))

    # Update the placement constraints so the new node doesn't end up on the same host
    marathon_config = sdk_marathon.get_config(config.PACKAGE_NAME)
    marathon_config['env'][
        'PLACEMENT_CONSTRAINT'] = 'hostname:UNLIKE:{}'.format(pod_host)
    sdk_marathon.update_app(config.PACKAGE_NAME, marathon_config)

    sdk_plan.wait_for_completed_deployment(config.PACKAGE_NAME)

    # start replace and wait for it to finish
    cmd.run_cli('cassandra pod replace {}'.format(pod_to_replace))
    sdk_plan.wait_for_kicked_off_recovery(config.PACKAGE_NAME)
    sdk_plan.wait_for_completed_recovery(
        config.PACKAGE_NAME, timeout_seconds=RECOVERY_TIMEOUT_SECONDS)
예제 #32
0
def test_modify_app_config():
    app_config_field = 'TASKCFG_ALL_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_EXPIRY_MS'

    journal_ids = tasks.get_task_ids(FOLDERED_SERVICE_NAME, 'journal')
    name_ids = tasks.get_task_ids(FOLDERED_SERVICE_NAME, 'name')

    config = marathon.get_config(FOLDERED_SERVICE_NAME)
    utils.out('marathon config: ')
    utils.out(config)
    expiry_ms = int(config['env'][app_config_field])
    config['env'][app_config_field] = str(expiry_ms + 1)
    marathon.update_app(FOLDERED_SERVICE_NAME, config)

    # All tasks should be updated because hdfs-site.xml has changed
    check_healthy()
    tasks.check_tasks_updated(FOLDERED_SERVICE_NAME, 'journal', journal_ids)
    tasks.check_tasks_updated(FOLDERED_SERVICE_NAME, 'name', name_ids)
    tasks.check_tasks_updated(FOLDERED_SERVICE_NAME, 'data', journal_ids)
예제 #33
0
def test_adding_data_nodes_only_restarts_masters():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    initial_master_task_ids = sdk_tasks.get_task_ids(foldered_name, "master")
    initial_data_task_ids = sdk_tasks.get_task_ids(foldered_name, "data")
    initial_coordinator_task_ids = sdk_tasks.get_task_ids(
        foldered_name, "coordinator")
    marathon_config = sdk_marathon.get_config(foldered_name)
    data_nodes = int(marathon_config['env']['DATA_NODE_COUNT'])
    marathon_config['env']['DATA_NODE_COUNT'] = str(data_nodes + 1)
    sdk_marathon.update_app(foldered_name, marathon_config)
    global current_expected_task_count
    current_expected_task_count += 1
    sdk_tasks.check_running(foldered_name, current_expected_task_count)
    sdk_tasks.check_tasks_updated(foldered_name, "master",
                                  initial_master_task_ids)
    sdk_tasks.check_tasks_not_updated(foldered_name, "data",
                                      initial_data_task_ids)
    sdk_tasks.check_tasks_not_updated(foldered_name, "coordinator",
                                      initial_coordinator_task_ids)
예제 #34
0
def test_custom_zookeeper():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    broker_ids = sdk_tasks.get_task_ids(foldered_name,
                                        '{}-'.format(config.DEFAULT_POD_TYPE))

    # create a topic against the default zk:
    sdk_cmd.svc_cli(config.PACKAGE_NAME,
                    foldered_name,
                    'topic create {}'.format(config.DEFAULT_TOPIC_NAME),
                    json=True)
    assert sdk_cmd.svc_cli(config.PACKAGE_NAME,
                           foldered_name,
                           'topic list',
                           json=True) == [config.DEFAULT_TOPIC_NAME]

    marathon_config = sdk_marathon.get_config(foldered_name)
    # should be using default path when this envvar is empty/unset:
    assert marathon_config['env']['KAFKA_ZOOKEEPER_URI'] == ''

    # use a custom zk path that's WITHIN the 'dcos-service-' path, so that it's automatically cleaned up in uninstall:
    zk_path = 'master.mesos:2181/{}/CUSTOMPATH'.format(
        sdk_utils.get_zk_path(foldered_name))
    marathon_config['env']['KAFKA_ZOOKEEPER_URI'] = zk_path
    sdk_marathon.update_app(foldered_name, marathon_config)

    sdk_tasks.check_tasks_updated(foldered_name,
                                  '{}-'.format(config.DEFAULT_POD_TYPE),
                                  broker_ids)
    sdk_plan.wait_for_completed_deployment(foldered_name)

    # wait for brokers to finish registering
    test_utils.broker_count_check(config.DEFAULT_BROKER_COUNT,
                                  service_name=foldered_name)

    zookeeper = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name,
                                'endpoints zookeeper')
    assert zookeeper.rstrip('\n') == zk_path

    # topic created earlier against default zk should no longer be present:
    assert sdk_cmd.svc_cli(config.PACKAGE_NAME,
                           foldered_name,
                           'topic list',
                           json=True) == []
예제 #35
0
def test_state_refresh_disable_cache():
    '''Disables caching via a scheduler envvar'''
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    config.check_running(foldered_name)
    task_ids = sdk_tasks.get_task_ids(foldered_name, '')

    # caching enabled by default:
    stdout = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'state refresh_cache')
    assert "Received cmd: refresh" in stdout

    marathon_config = sdk_marathon.get_config(foldered_name)
    marathon_config['env']['DISABLE_STATE_CACHE'] = 'any-text-here'
    sdk_marathon.update_app(foldered_name, marathon_config)

    sdk_tasks.check_tasks_not_updated(foldered_name, '', task_ids)
    config.check_running(foldered_name)

    # caching disabled, refresh_cache should fail with a 409 error (eventually, once scheduler is up):
    def check_cache_refresh_fails_409conflict():
        try:
            sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'state refresh_cache')
        except Exception as e:
            if "failed: 409 Conflict" in e.args[0]:
                return True
        return False

    shakedown.wait_for(lambda: check_cache_refresh_fails_409conflict(), timeout_seconds=120.)

    marathon_config = sdk_marathon.get_config(foldered_name)
    del marathon_config['env']['DISABLE_STATE_CACHE']
    sdk_marathon.update_app(foldered_name, marathon_config)

    sdk_tasks.check_tasks_not_updated(foldered_name, '', task_ids)
    config.check_running(foldered_name)
    shakedown.deployment_wait()  # ensure marathon thinks the deployment is complete too

    # caching reenabled, refresh_cache should succeed (eventually, once scheduler is up):
    def check_cache_refresh():
        return sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'state refresh_cache')

    stdout = shakedown.wait_for(lambda: check_cache_refresh(), timeout_seconds=120.)
    assert "Received cmd: refresh" in stdout
예제 #36
0
def test_state_refresh_disable_cache():
    '''Disables caching via a scheduler envvar'''
    check_running(FOLDERED_SERVICE_NAME)
    task_ids = sdk_tasks.get_task_ids(FOLDERED_SERVICE_NAME, '')

    # caching enabled by default:
    stdout = sdk_cmd.run_cli('hello-world --name={} state refresh_cache'.format(FOLDERED_SERVICE_NAME))
    assert "Received cmd: refresh" in stdout

    config = sdk_marathon.get_config(FOLDERED_SERVICE_NAME)
    config['env']['DISABLE_STATE_CACHE'] = 'any-text-here'
    sdk_marathon.update_app(FOLDERED_SERVICE_NAME, config)

    sdk_tasks.check_tasks_not_updated(FOLDERED_SERVICE_NAME, '', task_ids)
    check_running(FOLDERED_SERVICE_NAME)

    # caching disabled, refresh_cache should fail with a 409 error (eventually, once scheduler is up):
    def check_cache_refresh_fails_409conflict():
        try:
            sdk_cmd.run_cli('hello-world --name={} state refresh_cache'.format(FOLDERED_SERVICE_NAME))
        except Exception as e:
            if "failed: 409 Conflict" in e.args[0]:
                return True
        return False

    shakedown.wait_for(lambda: check_cache_refresh_fails_409conflict(), timeout_seconds=120.)

    config = sdk_marathon.get_config(FOLDERED_SERVICE_NAME)
    del config['env']['DISABLE_STATE_CACHE']
    sdk_marathon.update_app(FOLDERED_SERVICE_NAME, config)

    sdk_tasks.check_tasks_not_updated(FOLDERED_SERVICE_NAME, '', task_ids)
    check_running(FOLDERED_SERVICE_NAME)
    shakedown.deployment_wait()  # ensure marathon thinks the deployment is complete too

    # caching reenabled, refresh_cache should succeed (eventually, once scheduler is up):
    def check_cache_refresh():
        return sdk_cmd.run_cli('hello-world --name={} state refresh_cache'.format(FOLDERED_SERVICE_NAME))

    stdout = shakedown.wait_for(lambda: check_cache_refresh(), timeout_seconds=120.)
    assert "Received cmd: refresh" in stdout
예제 #37
0
def test_custom_zookeeper():
    broker_ids = sdk_tasks.get_task_ids(FOLDERED_SERVICE_NAME,
                                        '{}-'.format(config.DEFAULT_POD_TYPE))

    # sanity check: brokers should be reinitialized:
    brokers = test_utils.service_cli('broker list',
                                     service_name=FOLDERED_SERVICE_NAME)
    assert set(brokers) == set(
        [str(i) for i in range(config.DEFAULT_BROKER_COUNT)])

    # create a topic against the default zk:
    test_utils.service_cli('topic create {}'.format(DEFAULT_TOPIC_NAME),
                           service_name=FOLDERED_SERVICE_NAME)
    assert test_utils.service_cli('topic list',
                                  service_name=FOLDERED_SERVICE_NAME) == [
                                      DEFAULT_TOPIC_NAME
                                  ]

    marathon_config = sdk_marathon.get_config(FOLDERED_SERVICE_NAME)
    # should be using default path when this envvar is empty/unset:
    assert marathon_config['env']['KAFKA_ZOOKEEPER_URI'] == ''

    # use a custom zk path that's WITHIN the 'dcos-service-' path, so that it's automatically cleaned up in uninstall:
    zk_path = 'master.mesos:2181/dcos-service-{}/CUSTOMPATH'.format(
        ZK_SERVICE_PATH)
    marathon_config['env']['KAFKA_ZOOKEEPER_URI'] = zk_path
    sdk_marathon.update_app(FOLDERED_SERVICE_NAME, marathon_config)

    sdk_tasks.check_tasks_updated(FOLDERED_SERVICE_NAME,
                                  '{}-'.format(config.DEFAULT_POD_TYPE),
                                  broker_ids)
    sdk_plan.wait_for_completed_deployment(FOLDERED_SERVICE_NAME)

    zookeeper = test_utils.service_cli('endpoints zookeeper',
                                       get_json=False,
                                       service_name=FOLDERED_SERVICE_NAME)
    assert zookeeper.rstrip('\n') == zk_path

    # topic created earlier against default zk should no longer be present:
    assert test_utils.service_cli('topic list',
                                  service_name=FOLDERED_SERVICE_NAME) == []
예제 #38
0
def test_modify_app_config_rollback():
    check_healthy()
    app_config_field = 'TASKCFG_ALL_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_EXPIRY_MS'

    journal_ids = tasks.get_task_ids(PACKAGE_NAME, 'journal')
    name_ids = tasks.get_task_ids(PACKAGE_NAME, 'name')
    zkfc_ids = tasks.get_task_ids(PACKAGE_NAME, 'zkfc')
    data_ids = tasks.get_task_ids(PACKAGE_NAME, 'data')
    print('journal ids: ' + str(journal_ids))
    print('name ids: ' + str(name_ids))
    print('zkfc ids: ' + str(zkfc_ids))
    print('data ids: ' + str(data_ids))

    old_config = marathon.get_config(PACKAGE_NAME)
    config = marathon.get_config(PACKAGE_NAME)
    print('marathon config: ')
    print(config)
    expiry_ms = int(config['env'][app_config_field])
    print('expiry ms: ' + str(expiry_ms))
    config['env'][app_config_field] = str(expiry_ms + 1)
    marathon.update_app(PACKAGE_NAME, config)

    # Wait for journal nodes to be affected by the change
    tasks.check_tasks_updated(PACKAGE_NAME, 'journal', journal_ids)
    journal_ids = tasks.get_task_ids(PACKAGE_NAME, 'journal')

    print('old config: ')
    print(old_config)
    # Put the old config back (rollback)
    marathon.update_app(PACKAGE_NAME, old_config)

    # Wait for the journal nodes to return to their old configuration
    tasks.check_tasks_updated(PACKAGE_NAME, 'journal', journal_ids)
    check_healthy()

    config = marathon.get_config(PACKAGE_NAME)
    assert int(config['env'][app_config_field]) == expiry_ms

    # ZKFC and Data tasks should not have been affected
    tasks.check_tasks_not_updated(PACKAGE_NAME, 'zkfc', zkfc_ids)
    tasks.check_tasks_not_updated(PACKAGE_NAME, 'data', data_ids)
예제 #39
0
def test_adding_data_node_only_restarts_masters():
    initial_master_task_ids = sdk_tasks.get_task_ids(foldered_name, "master")
    initial_data_task_ids = sdk_tasks.get_task_ids(foldered_name, "data")
    initial_coordinator_task_ids = sdk_tasks.get_task_ids(
        foldered_name, "coordinator")
    marathon_config = sdk_marathon.get_config(foldered_name)
    data_nodes = int(marathon_config["env"]["DATA_NODE_COUNT"])
    marathon_config["env"]["DATA_NODE_COUNT"] = str(data_nodes + 1)
    sdk_marathon.update_app(marathon_config)
    sdk_plan.wait_for_completed_deployment(foldered_name)
    global current_expected_task_count
    current_expected_task_count += 1
    sdk_tasks.check_running(foldered_name, current_expected_task_count)
    sdk_tasks.check_tasks_updated(foldered_name, "master",
                                  initial_master_task_ids)
    sdk_tasks.check_tasks_not_updated(foldered_name, "data",
                                      initial_data_task_ids)
    sdk_tasks.check_tasks_not_updated(foldered_name, "coordinator",
                                      initial_coordinator_task_ids)
    sdk_plan.wait_for_completed_deployment(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)
예제 #40
0
def test_custom_decommission():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    marathon_config = sdk_marathon.get_config(foldered_name)
    marathon_config['env']['WORLD_COUNT'] = '1'
    sdk_marathon.update_app(foldered_name, marathon_config)

    sdk_plan.wait_for_completed_plan(foldered_name, 'decommission')
    decommission_plan = sdk_plan.get_decommission_plan(foldered_name)
    log.info("decommission plan: {}".format(decommission_plan))

    custom_step_name = decommission_plan['phases'][0]['steps'][0]['name']
    assert "custom_decomission_step" == custom_step_name

    # scale back up
    marathon_config = sdk_marathon.get_config(foldered_name)
    marathon_config['env']['WORLD_COUNT'] = '2'
    sdk_marathon.update_app(foldered_name, marathon_config)
    sdk_plan.wait_for_completed_deployment(foldered_name)

    # Let's decommission again!
    marathon_config = sdk_marathon.get_config(foldered_name)
    marathon_config['env']['WORLD_COUNT'] = '1'
    sdk_marathon.update_app(foldered_name, marathon_config)
    sdk_plan.wait_for_completed_deployment(foldered_name)

    sdk_plan.wait_for_completed_plan(foldered_name, 'decommission')
    decommission_plan = sdk_plan.get_decommission_plan(foldered_name)
    log.info("decommission plan: {}".format(decommission_plan))

    custom_step_name = decommission_plan['phases'][0]['steps'][0]['name']
    assert "custom_decomission_step" == custom_step_name
예제 #41
0
def test_deploy():
    wait_time = 30
    # taskcfg.yml will initially fail to deploy because several options are missing in the default
    # sdk_marathon.json.mustache. verify that tasks are failing for 30s before continuing.
    log.info(
        'Checking that tasks are failing to launch for at least {}s'.format(
            wait_time))

    # we can get brief blips of TASK_RUNNING but they shouldnt last more than 2-3s:
    consecutive_task_running = 0

    @retrying.retry(wait_fixed=1000,
                    stop_max_delay=1000 * wait_time,
                    retry_on_result=lambda res: not res)
    def wait():
        nonlocal consecutive_task_running
        svc_tasks = shakedown.get_service_tasks(config.SERVICE_NAME)
        states = [t['state'] for t in svc_tasks]
        log.info('Task states: {}'.format(states))
        if 'TASK_RUNNING' in states:
            consecutive_task_running += 1
            assert consecutive_task_running <= 3
        else:
            consecutive_task_running = 0
        return False

    try:
        wait()
    except retrying.RetryError:
        log.info('Timeout reached as expected')

    # add the needed envvars in marathon and confirm that the deployment succeeds:
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    env = marathon_config['env']
    del env['SLEEP_DURATION']
    env['TASKCFG_ALL_OUTPUT_FILENAME'] = 'output'
    env['TASKCFG_ALL_SLEEP_DURATION'] = '1000'
    sdk_marathon.update_app(config.SERVICE_NAME, marathon_config)

    config.check_running()
예제 #42
0
def test_custom_decommission():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    marathon_config = sdk_marathon.get_config(foldered_name)
    marathon_config["env"]["WORLD_COUNT"] = "1"
    sdk_marathon.update_app(marathon_config)

    sdk_plan.wait_for_completed_plan(foldered_name, "decommission")
    decommission_plan = sdk_plan.get_decommission_plan(foldered_name)
    log.info(sdk_plan.plan_string("decommission", decommission_plan))

    custom_step_name = decommission_plan["phases"][0]["steps"][0]["name"]
    assert "custom_decommission_step" == custom_step_name

    # scale back up
    marathon_config = sdk_marathon.get_config(foldered_name)
    marathon_config["env"]["WORLD_COUNT"] = "2"
    sdk_marathon.update_app(marathon_config)
    sdk_plan.wait_for_completed_deployment(foldered_name)

    # Let's decommission again!
    marathon_config = sdk_marathon.get_config(foldered_name)
    marathon_config["env"]["WORLD_COUNT"] = "1"
    sdk_marathon.update_app(marathon_config)
    sdk_plan.wait_for_completed_deployment(foldered_name)

    sdk_plan.wait_for_completed_plan(foldered_name, "decommission")
    decommission_plan = sdk_plan.get_decommission_plan(foldered_name)
    log.info(sdk_plan.plan_string("decommission", decommission_plan))

    custom_step_name = decommission_plan["phases"][0]["steps"][0]["name"]
    assert "custom_decommission_step" == custom_step_name
예제 #43
0
def test_port_static_to_static_port():
    tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT)

    broker_ids = tasks.get_task_ids(SERVICE_NAME,
                                    '{}-'.format(DEFAULT_POD_TYPE))

    config = marathon.get_config(SERVICE_NAME)
    utils.out('Old Config :{}'.format(config))

    for broker_id in range(DEFAULT_BROKER_COUNT):
        result = service_cli('broker get {}'.format(broker_id))
        assert result['port'] == 9092

    result = service_cli('endpoints broker')
    assert len(result['address']) == DEFAULT_BROKER_COUNT
    assert len(result['dns']) == DEFAULT_BROKER_COUNT

    for port in result['address']:
        assert int(port.split(':')[-1]) == 9092
    for port in result['dns']:
        assert int(port.split(':')[-1]) == 9092

    config['env']['BROKER_PORT'] = '9095'
    marathon.update_app(SERVICE_NAME, config)
    utils.out('New Config :{}'.format(config))

    tasks.check_tasks_updated(SERVICE_NAME, '{}-'.format(DEFAULT_POD_TYPE),
                              broker_ids)
    # all tasks are running
    tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT)

    result = service_cli('endpoints broker')
    assert len(result['address']) == DEFAULT_BROKER_COUNT
    assert len(result['dns']) == DEFAULT_BROKER_COUNT

    for port in result['address']:
        assert int(port.split(':')[-1]) == 9095
    for port in result['dns']:
        assert int(port.split(':')[-1]) == 9095
예제 #44
0
def setup_constraint_switch():
    sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)

    agents = shakedown.get_private_agents()
    some_agent = agents[0]
    other_agent = agents[1]
    log.info('Agents: %s %s', some_agent, other_agent)
    assert some_agent != other_agent
    options = _escape_placement_for_1_9({
        "service": {
            "yaml": "marathon_constraint"
        },
        "hello": {
            "count": 1,
            # First, we stick the pod to some_agent
            "placement":
            "[[\"hostname\", \"LIKE\", \"{}\"]]".format(some_agent)
        },
        "world": {
            "count": 0
        }
    })
    sdk_install.install(config.PACKAGE_NAME,
                        config.SERVICE_NAME,
                        1,
                        additional_options=options)
    sdk_tasks.check_running(config.SERVICE_NAME, 1)
    hello_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello')

    # Now, stick it to other_agent
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    marathon_config['env'][
        'HELLO_PLACEMENT'] = "[[\"hostname\", \"LIKE\", \"{}\"]]".format(
            other_agent)
    sdk_marathon.update_app(config.SERVICE_NAME, marathon_config)
    # Wait for the scheduler to be up and settled before advancing.
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)

    return some_agent, other_agent, hello_ids
예제 #45
0
def test_custom_zookeeper():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    broker_ids = sdk_tasks.get_task_ids(foldered_name,
                                        "{}-".format(config.DEFAULT_POD_TYPE))

    # create a topic against the default zk:
    test_utils.create_topic(config.DEFAULT_TOPIC_NAME,
                            service_name=foldered_name)

    marathon_config = sdk_marathon.get_config(foldered_name)
    # should be using default path when this envvar is empty/unset:
    assert marathon_config["env"]["KAFKA_ZOOKEEPER_URI"] == ""

    # use a custom zk path that's WITHIN the 'dcos-service-' path, so that it's automatically cleaned up in uninstall:
    zk_path = "master.mesos:2181/{}/CUSTOMPATH".format(
        sdk_utils.get_zk_path(foldered_name))
    marathon_config["env"]["KAFKA_ZOOKEEPER_URI"] = zk_path
    sdk_marathon.update_app(marathon_config)

    sdk_tasks.check_tasks_updated(foldered_name,
                                  "{}-".format(config.DEFAULT_POD_TYPE),
                                  broker_ids)
    sdk_plan.wait_for_completed_deployment(foldered_name)

    # wait for brokers to finish registering
    test_utils.broker_count_check(config.DEFAULT_BROKER_COUNT,
                                  service_name=foldered_name)

    zookeeper = sdk_networks.get_endpoint_string(config.PACKAGE_NAME,
                                                 foldered_name, "zookeeper")
    assert zookeeper == zk_path

    # topic created earlier against default zk should no longer be present:
    rc, stdout, _ = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name,
                                    "topic list")
    assert rc == 0, "Topic list command failed"

    test_utils.assert_topic_lists_are_equal_without_automatic_topics(
        [], json.loads(stdout))
예제 #46
0
def test_port_static_to_static_port():
    sdk_tasks.check_running(config.SERVICE_NAME, config.DEFAULT_BROKER_COUNT)

    broker_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME,
                                        '{}-'.format(config.DEFAULT_POD_TYPE))

    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)

    for broker_id in range(config.DEFAULT_BROKER_COUNT):
        result = test_utils.service_cli('broker get {}'.format(broker_id))
        assert result['port'] == 9092

    result = test_utils.service_cli('endpoints broker')
    assert len(result['address']) == config.DEFAULT_BROKER_COUNT
    assert len(result['dns']) == config.DEFAULT_BROKER_COUNT

    for port in result['address']:
        assert int(port.split(':')[-1]) == 9092
    for port in result['dns']:
        assert int(port.split(':')[-1]) == 9092

    marathon_config['env']['BROKER_PORT'] = '9095'
    sdk_marathon.update_app(config.SERVICE_NAME, marathon_config)

    sdk_tasks.check_tasks_updated(config.SERVICE_NAME,
                                  '{}-'.format(config.DEFAULT_POD_TYPE),
                                  broker_ids)
    # all tasks are running
    sdk_tasks.check_running(config.SERVICE_NAME, config.DEFAULT_BROKER_COUNT)

    result = test_utils.service_cli('endpoints broker')
    assert len(result['address']) == config.DEFAULT_BROKER_COUNT
    assert len(result['dns']) == config.DEFAULT_BROKER_COUNT

    for port in result['address']:
        assert int(port.split(':')[-1]) == 9095
    for port in result['dns']:
        assert int(port.split(':')[-1]) == 9095
def test_config_update_while_partitioned():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    host = sdk_hosts.system_host(config.SERVICE_NAME, "world-0-server")
    shakedown.partition_agent(host)

    service_config = sdk_marathon.get_config(config.SERVICE_NAME)
    updated_cpus = float(service_config['env']['WORLD_CPUS']) + 0.1
    service_config['env']['WORLD_CPUS'] = str(updated_cpus)
    sdk_marathon.update_app(config.SERVICE_NAME,
                            service_config,
                            wait_for_completed_deployment=False)

    shakedown.reconnect_agent(host)
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()
    all_tasks = shakedown.get_service_tasks(config.SERVICE_NAME)
    running_tasks = [
        t for t in all_tasks
        if t['name'].startswith('world') and t['state'] == "TASK_RUNNING"
    ]
    assert len(running_tasks) == config.world_task_count(config.SERVICE_NAME)
    for t in running_tasks:
        assert config.close_enough(t['resources']['cpus'], updated_cpus)
예제 #48
0
def setup_constraint_switch():
    sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)

    agents = sdk_agents.get_private_agents()
    some_agent = agents[0]["hostname"]
    other_agent = agents[1]["hostname"]
    log.info("Agents: %s %s", some_agent, other_agent)
    assert some_agent != other_agent
    options = _escape_placement_for_1_9({
        "service": {
            "yaml": "marathon_constraint"
        },
        "hello": {
            "count": 1,
            # First, we stick the pod to some_agent
            "placement": '[["hostname", "LIKE", "{}"]]'.format(some_agent),
        },
        "world": {
            "count": 0
        },
    })
    sdk_install.install(config.PACKAGE_NAME,
                        config.SERVICE_NAME,
                        1,
                        additional_options=options)
    sdk_tasks.check_running(config.SERVICE_NAME, 1)
    hello_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, "hello")

    # Now, stick it to other_agent
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    marathon_config["env"][
        "HELLO_PLACEMENT"] = '[["hostname", "LIKE", "{}"]]'.format(other_agent)
    sdk_marathon.update_app(marathon_config)
    # Wait for the scheduler to be up and settled before advancing.
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)

    return some_agent, other_agent, hello_ids
def test_increase_count():

    config = marathon.get_config(SERVICE_NAME)
    config['env']['BROKER_COUNT'] = str(int(config['env']['BROKER_COUNT']) + 1)
    marathon.update_app(SERVICE_NAME, config)

    try:
        tasks.check_running(PACKAGE_NAME, DEFAULT_BROKER_COUNT + 1, timeout_seconds=60)
        assert False, "Should not start task now"
    except AssertionError as arg:
        raise arg
    except:
        pass  # expected to fail

    tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT)

    pl = service_cli('plan show {}'.format(DEFAULT_PLAN_NAME))
    assert pl['status'] == 'WAITING'
    assert pl['phases'][0]['status'] == 'WAITING'

    for step in range(DEFAULT_BROKER_COUNT):
        assert pl['phases'][0]['steps'][step]['status'] == 'COMPLETE'

    assert pl['phases'][0]['steps'][DEFAULT_BROKER_COUNT]['status'] == 'WAITING'

    service_cli('plan continue {} {}'.format(DEFAULT_PLAN_NAME, DEFAULT_PHASE_NAME))

    tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT + 1)

    broker_count_check(DEFAULT_BROKER_COUNT + 1)

    pl = service_cli('plan show {}'.format(DEFAULT_PLAN_NAME))
    assert pl['status'] == 'COMPLETE'
    assert pl['phases'][0]['status'] == 'COMPLETE'

    for step in range(DEFAULT_BROKER_COUNT + 1):
        assert pl['phases'][0]['steps'][step]['status'] == 'COMPLETE'
예제 #50
0
def test_deploy():
    wait_time = 30
    # taskcfg.yml will initially fail to deploy because several options are missing in the default
    # marathon.json.mustache. verify that tasks are failing for 30s before continuing.
    sdk_utils.out(
        'Checking that tasks are failing to launch for at least {}s'.format(
            wait_time))

    # we can get brief blips of TASK_RUNNING but they shouldnt last more than 2-3s:
    consecutive_task_running = 0

    def fn():
        nonlocal consecutive_task_running
        svc_tasks = shakedown.get_service_tasks(PACKAGE_NAME)
        states = [t['state'] for t in svc_tasks]
        sdk_utils.out('Task states: {}'.format(states))
        if 'TASK_RUNNING' in states:
            consecutive_task_running += 1
            assert consecutive_task_running <= 3
        else:
            consecutive_task_running = 0
        return False

    try:
        shakedown.wait_for(lambda: fn(), timeout_seconds=wait_time)
    except shakedown.TimeoutExpired:
        sdk_utils.out('Timeout reached as expected')

    # add the needed envvars in marathon and confirm that the deployment succeeds:
    config = marathon.get_config(PACKAGE_NAME)
    env = config['env']
    del env['SLEEP_DURATION']
    env['TASKCFG_ALL_OUTPUT_FILENAME'] = 'output'
    env['TASKCFG_ALL_SLEEP_DURATION'] = '1000'
    marathon.update_app(PACKAGE_NAME, config)

    check_running()
예제 #51
0
def test_node_replace_replaces_node():
    replace_task = [
        task for task in sdk_tasks.get_summary()
        if task.name == 'node-2-server'
    ][0]
    log.info('avoid host for task {}'.format(replace_task))

    replace_pod_name = replace_task.name[:-len('-server')]

    # Update the placement constraints so the new node doesn't end up on the same host
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    marathon_config['env'][
        'PLACEMENT_CONSTRAINT'] = '[["hostname", "UNLIKE", "{}"]]'.format(
            replace_task.host)
    sdk_marathon.update_app(config.SERVICE_NAME, marathon_config)

    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)

    # start replace and wait for it to finish
    sdk_cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME,
                    'pod replace {}'.format(replace_pod_name))
    sdk_plan.wait_for_kicked_off_recovery(config.SERVICE_NAME)
    sdk_plan.wait_for_completed_recovery(
        config.SERVICE_NAME, timeout_seconds=RECOVERY_TIMEOUT_SECONDS)
예제 #52
0
def test_node_replace_replaces_node():
    tasks = cmd.run_cli('task')
    node_ip = [t for t in tasks.split('\n')
               if t.startswith('node-2-server')].pop().split()[1]

    # Update the placement constraints so the new node doesn't end up on the
    # same host
    config = marathon.get_config(PACKAGE_NAME)
    config['env']['PLACEMENT_CONSTRAINT'] = 'hostname:UNLIKE:{}'.format(
        node_ip)
    marathon.update_app(PACKAGE_NAME, config)

    plan.wait_for_completed_deployment(PACKAGE_NAME)

    # start replace and wait for it to finish
    cmd.run_cli('cassandra pods replace node-2')
    plan.wait_for_completed_recovery(PACKAGE_NAME)

    # Install replace verification job with correct node IP templated
    # (the job checks for that IP's absence in the peers list and also verifies
    # that the expected number of peers is present, meaning that the node was
    # replaced from Cassandra's perspective)
    with JobContext([VERIFY_REPLACE_JOB], NODE_IP=node_ip):
        spin.time_wait_noisy(lambda: try_job(VERIFY_REPLACE_JOB))
예제 #53
0
def test_port_static_to_dynamic_port():
    tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT)

    broker_ids = tasks.get_task_ids(SERVICE_NAME,
                                    '{}-'.format(DEFAULT_POD_TYPE))

    config = marathon.get_config(SERVICE_NAME)
    config['env']['BROKER_PORT'] = '0'
    marathon.update_app(SERVICE_NAME, config)

    tasks.check_tasks_updated(SERVICE_NAME, '{}-'.format(DEFAULT_POD_TYPE),
                              broker_ids)
    # all tasks are running
    tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT)

    result = service_cli('endpoints broker')
    assert len(result['address']) == DEFAULT_BROKER_COUNT
    assert len(result['dns']) == DEFAULT_BROKER_COUNT

    for port in result['address']:
        assert int(port.split(':')[-1]) != 9092

    for port in result['dns']:
        assert int(port.split(':')[-1]) != 9092
예제 #54
0
def test_lock():
    """This test verifies that a second scheduler fails to startup when
    an existing scheduler is running.  Without locking, the scheduler
    would fail during registration, but after writing its config to ZK.
    So in order to verify that the scheduler fails immediately, we ensure
    that the ZK config state is unmodified."""

    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)

    def get_zk_node_data(node_name):
        return sdk_cmd.cluster_request(
            "GET", "/exhibitor/exhibitor/v1/explorer/node-data?key={}".format(
                node_name)).json()

    # Get ZK state from running framework
    zk_path = "{}/ConfigTarget".format(sdk_utils.get_zk_path(foldered_name))
    zk_config_old = get_zk_node_data(zk_path)

    # Get marathon app
    marathon_config = sdk_marathon.get_config(foldered_name)
    old_timestamp = marathon_config.get("lastTaskFailure",
                                        {}).get("timestamp", None)

    # Scale to 2 instances
    labels = marathon_config["labels"]
    original_labels = labels.copy()
    labels.pop("MARATHON_SINGLE_INSTANCE_APP")
    sdk_marathon.update_app(marathon_config)
    marathon_config["instances"] = 2
    sdk_marathon.update_app(marathon_config,
                            wait_for_completed_deployment=False)

    @retrying.retry(wait_fixed=1000,
                    stop_max_delay=120 * 1000,
                    retry_on_result=lambda res: not res)
    def wait_for_second_scheduler_to_fail():
        timestamp = (sdk_marathon.get_config(foldered_name).get(
            "lastTaskFailure", {}).get("timestamp", None))
        return timestamp != old_timestamp

    wait_for_second_scheduler_to_fail()

    # Verify ZK is unchanged
    zk_config_new = get_zk_node_data(zk_path)
    assert zk_config_old == zk_config_new

    # In order to prevent the second scheduler instance from obtaining a lock, we undo the "scale-up" operation
    marathon_config["instances"] = 1
    marathon_config["labels"] = original_labels
    sdk_marathon.update_app(marathon_config, force=True)
예제 #55
0
def test_secrets_config_update():
    # 1) install examples/secrets.yml
    # 2) create new Secrets, delete old Secrets
    # 2) update configuration with new Secrets
    # 4) verify secret content (using new Secrets after config update)

    install.uninstall(PACKAGE_NAME)

    create_secrets("{}/".format(PACKAGE_NAME))

    install.install(PACKAGE_NAME,
                    NUM_HELLO + NUM_WORLD,
                    additional_options=secret_options)

    # launch will fail if secrets are not available or not accessible
    plan.wait_for_completed_deployment(PACKAGE_NAME)

    # tasks will fail if secret file is not created
    tasks.check_running(PACKAGE_NAME, NUM_HELLO + NUM_WORLD)

    # Verify secret content, one from each pod type
    # get tasks ids - only first pods
    hello_tasks = tasks.get_task_ids(PACKAGE_NAME, "hello-0")
    world_tasks = tasks.get_task_ids(PACKAGE_NAME, "world-0")

    # make sure it has the default value
    assert secret_content_default == task_exec(
        world_tasks[0], "bash -c 'echo $WORLD_SECRET1_ENV'")
    assert secret_content_default == task_exec(world_tasks[0],
                                               "cat WORLD_SECRET2_FILE")
    assert secret_content_default == task_exec(
        world_tasks[0], "cat {}/secret3".format(PACKAGE_NAME))

    # hello tasks has container image
    assert secret_content_default == task_exec(
        hello_tasks[0], "bash -c 'echo $HELLO_SECRET1_ENV'")
    assert secret_content_default == task_exec(hello_tasks[0],
                                               "cat HELLO_SECRET1_FILE")
    assert secret_content_default == task_exec(hello_tasks[0],
                                               "cat HELLO_SECRET2_FILE")

    # clean up and delete secrets (defaults)
    delete_secrets("{}/".format(PACKAGE_NAME))

    # create new secrets with new content -- New Value
    create_secrets(secret_content_arg=secret_content_alternative)

    config = marathon.get_config(PACKAGE_NAME)
    config['env']['HELLO_SECRET1'] = 'secret1'
    config['env']['HELLO_SECRET2'] = 'secret2'
    config['env']['WORLD_SECRET1'] = 'secret1'
    config['env']['WORLD_SECRET2'] = 'secret2'
    config['env']['WORLD_SECRET3'] = 'secret3'

    # config update
    marathon.update_app(PACKAGE_NAME, config)

    # wait till plan is complete - pods are supposed to restart
    plan.wait_for_completed_deployment(PACKAGE_NAME)

    # all tasks are running
    tasks.check_running(PACKAGE_NAME, NUM_HELLO + NUM_WORLD)

    # Verify secret content is changed

    # get task ids - only first pod
    hello_tasks = tasks.get_task_ids(PACKAGE_NAME, "hello-0")
    world_tasks = tasks.get_task_ids(PACKAGE_NAME, "world-0")

    assert secret_content_alternative == task_exec(
        world_tasks[0], "bash -c 'echo $WORLD_SECRET1_ENV'")
    assert secret_content_alternative == task_exec(world_tasks[0],
                                                   "cat WORLD_SECRET2_FILE")
    assert secret_content_alternative == task_exec(world_tasks[0],
                                                   "cat secret3")

    assert secret_content_alternative == task_exec(
        hello_tasks[0], "bash -c 'echo $HELLO_SECRET1_ENV'")
    assert secret_content_alternative == task_exec(hello_tasks[0],
                                                   "cat HELLO_SECRET1_FILE")
    assert secret_content_alternative == task_exec(hello_tasks[0],
                                                   "cat HELLO_SECRET2_FILE")

    # clean up and delete secrets
    delete_secrets()
예제 #56
0
def update_app(service_name, options, expected_task_count):
    config = sdk_marathon.get_config(service_name)
    config['env'].update(options)
    sdk_marathon.update_app(service_name, config)
    sdk_plan.wait_for_completed_deployment(service_name)
    sdk_tasks.check_running(service_name, expected_task_count)
def test_increase_cpu():
    def plan_waiting():
        try:
            pl = service_cli('plan show {}'.format(DEFAULT_PLAN_NAME))
            if pl['status'] == 'WAITING':
                return True
        except:
            pass
        return False

    def plan_complete():
        try:
            pl = service_cli('plan show {}'.format(DEFAULT_PLAN_NAME))
            if pl['status'] == 'COMPLETE':
                return True
        except:
            pass
        return False

    config = marathon.get_config(SERVICE_NAME)
    config['env']['BROKER_CPUS'] = str(0.1 + float(config['env']['BROKER_CPUS']))
    marathon.update_app(SERVICE_NAME, config)

    spin.time_wait_return(plan_waiting)

    pl = service_cli('plan show {}'.format(DEFAULT_PLAN_NAME))
    assert pl['status'] == 'WAITING'
    assert pl['phases'][0]['status'] == 'WAITING'

    assert pl['phases'][0]['steps'][0]['status'] == 'WAITING'
    assert pl['phases'][0]['steps'][1]['status'] == 'WAITING'
    for step in range (2, DEFAULT_BROKER_COUNT +1 ):
        assert pl['phases'][0]['steps'][step]['status'] == 'PENDING'

    # all tasks are still running
    tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT + 1)

    broker_ids = tasks.get_task_ids(SERVICE_NAME, '{}-0-{}'.format(DEFAULT_POD_TYPE, DEFAULT_TASK_NAME))

    service_cli('plan continue {} {}'.format(DEFAULT_PLAN_NAME, DEFAULT_PHASE_NAME))

    tasks.check_tasks_updated(SERVICE_NAME, '{}-0-{}'.format(DEFAULT_POD_TYPE, DEFAULT_TASK_NAME), broker_ids)

    tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT + 1)

    pl = service_cli('plan show {}'.format(DEFAULT_PLAN_NAME))

    assert pl['status'] == 'WAITING'
    assert pl['phases'][0]['status'] == 'WAITING'

    assert pl['phases'][0]['steps'][0]['status'] == 'COMPLETE'
    assert pl['phases'][0]['steps'][1]['status'] == 'WAITING'

    for step in range(2, DEFAULT_BROKER_COUNT + 1):
        assert pl['phases'][0]['steps'][step]['status'] == 'PENDING'

    broker_ids = tasks.get_task_ids(SERVICE_NAME, '{}-1-{}'.format(DEFAULT_POD_TYPE, DEFAULT_TASK_NAME))

    service_cli('plan continue {} {}'.format(DEFAULT_PLAN_NAME, DEFAULT_PHASE_NAME))

    tasks.check_tasks_updated(SERVICE_NAME, '{}-1-{}'.format(DEFAULT_POD_TYPE, DEFAULT_TASK_NAME), broker_ids)

    spin.time_wait_return(plan_complete)

    pl = service_cli('plan show {}'.format(DEFAULT_PLAN_NAME))

    assert pl['status'] == 'COMPLETE'
    assert pl['phases'][0]['status'] == 'COMPLETE'
    for step in range(DEFAULT_BROKER_COUNT + 1):
        assert pl['phases'][0]['steps'][step]['status'] == 'COMPLETE'

    broker_count_check(DEFAULT_BROKER_COUNT + 1)
예제 #58
0
def test_secrets_config_update():
    # 1) install examples/secrets.yml
    # 2) create new Secrets, delete old Secrets
    # 2) update configuration with new Secrets
    # 4) verify secret content (using new Secrets after config update)

    sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)

    create_secrets("{}/".format(config.SERVICE_NAME))

    sdk_install.install(
        config.PACKAGE_NAME,
        config.SERVICE_NAME,
        NUM_HELLO + NUM_WORLD,
        additional_options=secret_options,
    )

    # tasks will fail if secret file is not created
    sdk_tasks.check_running(config.SERVICE_NAME, NUM_HELLO + NUM_WORLD)

    # Verify secret content, one from each pod type

    # make sure it has the default value
    assert secret_content_default == read_secret(
        "world-0-server", "bash -c 'echo $WORLD_SECRET1_ENV'")
    assert secret_content_default == read_secret("world-0-server",
                                                 "cat WORLD_SECRET2_FILE")
    assert secret_content_default == read_secret(
        "world-0-server", "cat {}/secret3".format(config.SERVICE_NAME))

    # hello tasks has container image
    assert secret_content_default == read_secret(
        "hello-0-server", "bash -c 'echo $HELLO_SECRET1_ENV'")
    assert secret_content_default == read_secret("hello-0-server",
                                                 "cat HELLO_SECRET1_FILE")
    assert secret_content_default == read_secret("hello-0-server",
                                                 "cat HELLO_SECRET2_FILE")

    # clean up and delete secrets (defaults)
    delete_secrets("{}/".format(config.SERVICE_NAME))

    # create new secrets with new content -- New Value
    create_secrets(secret_content=secret_content_alternative)

    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    marathon_config["env"]["HELLO_SECRET1"] = "secret1"
    marathon_config["env"]["HELLO_SECRET2"] = "secret2"
    marathon_config["env"]["WORLD_SECRET1"] = "secret1"
    marathon_config["env"]["WORLD_SECRET2"] = "secret2"
    marathon_config["env"]["WORLD_SECRET3"] = "secret3"

    # config update
    sdk_marathon.update_app(marathon_config)

    # wait till plan is complete - pods are supposed to restart
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)

    # all tasks are running
    sdk_tasks.check_running(config.SERVICE_NAME, NUM_HELLO + NUM_WORLD)

    # Verify secret content is changed

    assert secret_content_alternative == read_secret(
        "world-0-server", "bash -c 'echo $WORLD_SECRET1_ENV'")
    assert secret_content_alternative == read_secret("world-0-server",
                                                     "cat WORLD_SECRET2_FILE")
    assert secret_content_alternative == read_secret("world-0-server",
                                                     "cat secret3")

    assert secret_content_alternative == read_secret(
        "hello-0-server", "bash -c 'echo $HELLO_SECRET1_ENV'")
    assert secret_content_alternative == read_secret("hello-0-server",
                                                     "cat HELLO_SECRET1_FILE")
    assert secret_content_alternative == read_secret("hello-0-server",
                                                     "cat HELLO_SECRET2_FILE")

    # clean up and delete secrets
    delete_secrets()
예제 #59
0
def _set_xpack(service_name, is_enabled):
    config = marathon.get_config(service_name)
    config['env']['TASKCFG_ALL_XPACK_ENABLED'] = is_enabled
    marathon.update_app(service_name, config)
    sdk_plan.wait_for_completed_deployment(service_name)
    tasks.check_running(service_name, DEFAULT_TASK_COUNT)
예제 #60
0
def xpack(is_enabled):
    config = marathon.get_config(PACKAGE_NAME)
    config['env']['TASKCFG_ALL_XPACK_ENABLED'] = is_enabled
    marathon.update_app(PACKAGE_NAME, config)
    sdk_plan.wait_for_completed_deployment(PACKAGE_NAME)
    tasks.check_running(PACKAGE_NAME, DEFAULT_TASK_COUNT)