def test_old_tasks_not_relaunched():
    hello_task_id = sdk_tasks.get_task_ids(config.SERVICE_NAME, "hello")
    assert len(hello_task_id) > 0, "Got an empty list of task_ids"
    # Start update plan with options that have list of yaml files to make it launch in multi service mode
    sdk_upgrade.update_or_upgrade_or_downgrade(
        config.PACKAGE_NAME,
        config.SERVICE_NAME,
        to_package_version=None,
        additional_options={
            "service": {
                "yaml": "",
                "yamls": "svc,foobar_service_name"
            }
        },
        expected_running_tasks=4,
        wait_for_deployment=False,
    )
    # Ensure new tasks are launched but the old task does not relaunch
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME,
                                           multiservice_name="foobar")
    sdk_tasks.check_task_not_relaunched(
        config.SERVICE_NAME,
        "hello-0-server",
        hello_task_id.pop(),
        multiservice_name=config.SERVICE_NAME,
    )
    assert len(sdk_tasks.get_task_ids(config.SERVICE_NAME, "foo")) == 1
Example #2
0
def check_tasks_not_updated(service_name, prefix, old_task_ids):
    sdk_plan.wait_for_completed_deployment(service_name)
    sdk_plan.wait_for_completed_recovery(service_name)
    task_ids = get_task_ids(service_name, prefix)
    task_sets = "\n- Old tasks: {}\n- Current tasks: {}".format(sorted(old_task_ids), sorted(task_ids))
    log.info('Checking tasks starting with "{}" have not been updated:{}'.format(prefix, task_sets))
    assert set(old_task_ids).issubset(set(task_ids)), "Tasks got updated:{}".format(task_sets)
Example #3
0
def test_custom_decommission():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    marathon_config = sdk_marathon.get_config(foldered_name)
    marathon_config['env']['WORLD_COUNT'] = '1'
    sdk_marathon.update_app(foldered_name, marathon_config)

    sdk_plan.wait_for_completed_plan(foldered_name, 'decommission')
    decommission_plan = sdk_plan.get_decommission_plan(foldered_name)
    log.info("decommission plan: {}".format(decommission_plan))

    custom_step_name = decommission_plan['phases'][0]['steps'][0]['name']
    assert "custom_decomission_step" == custom_step_name

    # scale back up
    marathon_config = sdk_marathon.get_config(foldered_name)
    marathon_config['env']['WORLD_COUNT'] = '2'
    sdk_marathon.update_app(foldered_name, marathon_config)
    sdk_plan.wait_for_completed_deployment(foldered_name)

    # Let's decommission again!
    marathon_config = sdk_marathon.get_config(foldered_name)
    marathon_config['env']['WORLD_COUNT'] = '1'
    sdk_marathon.update_app(foldered_name, marathon_config)
    sdk_plan.wait_for_completed_deployment(foldered_name)

    sdk_plan.wait_for_completed_plan(foldered_name, 'decommission')
    decommission_plan = sdk_plan.get_decommission_plan(foldered_name)
    log.info("decommission plan: {}".format(decommission_plan))

    custom_step_name = decommission_plan['phases'][0]['steps'][0]['name']
    assert "custom_decomission_step" == custom_step_name
Example #4
0
def test_custom_zookeeper():
    broker_ids = sdk_tasks.get_task_ids(
        FOLDERED_SERVICE_NAME, '{}-'.format(config.DEFAULT_POD_TYPE))

    # create a topic against the default zk:
    sdk_cmd.svc_cli(
        config.PACKAGE_NAME, FOLDERED_SERVICE_NAME,
        'topic create {}'.format(config.DEFAULT_TOPIC_NAME), json=True)
    assert sdk_cmd.svc_cli(
        config.PACKAGE_NAME, FOLDERED_SERVICE_NAME,
        'topic list', json=True) == [config.DEFAULT_TOPIC_NAME]

    marathon_config = sdk_marathon.get_config(FOLDERED_SERVICE_NAME)
    # should be using default path when this envvar is empty/unset:
    assert marathon_config['env']['KAFKA_ZOOKEEPER_URI'] == ''

    # use a custom zk path that's WITHIN the 'dcos-service-' path, so that it's automatically cleaned up in uninstall:
    zk_path = 'master.mesos:2181/{}/CUSTOMPATH'.format(ZK_SERVICE_PATH)
    marathon_config['env']['KAFKA_ZOOKEEPER_URI'] = zk_path
    sdk_marathon.update_app(FOLDERED_SERVICE_NAME, marathon_config)

    sdk_tasks.check_tasks_updated(
        FOLDERED_SERVICE_NAME, '{}-'.format(config.DEFAULT_POD_TYPE), broker_ids)
    sdk_plan.wait_for_completed_deployment(FOLDERED_SERVICE_NAME)

    # wait for brokers to finish registering
    test_utils.broker_count_check(config.DEFAULT_BROKER_COUNT, service_name=FOLDERED_SERVICE_NAME)

    zookeeper = sdk_cmd.svc_cli(
        config.PACKAGE_NAME, FOLDERED_SERVICE_NAME,
        'endpoints zookeeper')
    assert zookeeper.rstrip('\n') == zk_path

    # topic created earlier against default zk should no longer be present:
    assert sdk_cmd.svc_cli(config.PACKAGE_NAME, FOLDERED_SERVICE_NAME, 'topic list', json=True) == []
def setup_constraint_switch():
    sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)

    agents = shakedown.get_private_agents()
    some_agent = agents[0]
    other_agent = agents[1]
    log.info('Agents: %s %s', some_agent, other_agent)
    assert some_agent != other_agent
    options = _escape_placement_for_1_9({
        "service": {
            "yaml": "marathon_constraint"
        },
        "hello": {
            "count": 1,
            # First, we stick the pod to some_agent
            "placement": "[[\"hostname\", \"LIKE\", \"{}\"]]".format(some_agent)
        },
        "world": {
            "count": 0
        }
    })
    sdk_install.install(config.PACKAGE_NAME, config.SERVICE_NAME, 1, additional_options=options)
    sdk_tasks.check_running(config.SERVICE_NAME, 1)
    hello_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello')

    # Now, stick it to other_agent
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    marathon_config['env']['HELLO_PLACEMENT'] = "[[\"hostname\", \"LIKE\", \"{}\"]]".format(other_agent)
    sdk_marathon.update_app(config.SERVICE_NAME, marathon_config)
    # Wait for the scheduler to be up and settled before advancing.
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)

    return some_agent, other_agent, hello_ids
Example #6
0
def kafka_service_tls(service_account):
    try:
        sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)
        config.install(
            config.PACKAGE_NAME,
            config.SERVICE_NAME,
            config.DEFAULT_BROKER_COUNT,
            additional_options={
                "service": {
                    "service_account": service_account,
                    "service_account_secret": service_account,
                    "security": {
                        "transport_encryption": {
                            "enabled": True
                        }
                    }
                }
            }
        )

        sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)

        yield service_account
    finally:
        sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)
Example #7
0
def hdfs_service_tls(service_account):
    try:
        sdk_install.install(PACKAGE_NAME,
                            service_name=SERVICE_NAME,
                            expected_running_tasks=DEFAULT_TASK_COUNT,
                            additional_options={
                                "service": {
                                    "service_account_secret": service_account,
                                    "service_account": service_account,
                                    "tls": {
                                        "enabled": True,
                                    }
                                }
                            })

        sdk_plan.wait_for_completed_deployment(SERVICE_NAME)

        # Wait for service health check to pass
        shakedown.service_healthy(SERVICE_NAME)
    except Exception as error:
        try:
            sdk_install.uninstall(PACKAGE_NAME, SERVICE_NAME)
        except:
            pass
        raise error

    yield

    sdk_install.uninstall(PACKAGE_NAME, SERVICE_NAME)
Example #8
0
def test_custom_zookeeper():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    broker_ids = sdk_tasks.get_task_ids(foldered_name, "{}-".format(config.DEFAULT_POD_TYPE))

    # create a topic against the default zk:
    test_utils.create_topic(config.DEFAULT_TOPIC_NAME, service_name=foldered_name)

    marathon_config = sdk_marathon.get_config(foldered_name)
    # should be using default path when this envvar is empty/unset:
    assert marathon_config["env"]["KAFKA_ZOOKEEPER_URI"] == ""

    # use a custom zk path that's WITHIN the 'dcos-service-' path, so that it's automatically cleaned up in uninstall:
    zk_path = "master.mesos:2181/{}/CUSTOMPATH".format(sdk_utils.get_zk_path(foldered_name))
    marathon_config["env"]["KAFKA_ZOOKEEPER_URI"] = zk_path
    sdk_marathon.update_app(marathon_config)

    sdk_tasks.check_tasks_updated(foldered_name, "{}-".format(config.DEFAULT_POD_TYPE), broker_ids)
    sdk_plan.wait_for_completed_deployment(foldered_name)

    # wait for brokers to finish registering
    test_utils.broker_count_check(config.DEFAULT_BROKER_COUNT, service_name=foldered_name)

    _, zookeeper, _ = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, "endpoints zookeeper")
    assert zookeeper.rstrip("\n") == zk_path

    # topic created earlier against default zk should no longer be present:
    _, topic_list_info, _ = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, "topic list", parse_json=True)

    test_utils.assert_topic_lists_are_equal_without_automatic_topics([], topic_list_info)
Example #9
0
def hdfs_service_tls(service_account):
    try:
        sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)
        sdk_install.install(
            config.PACKAGE_NAME,
            service_name=config.SERVICE_NAME,
            expected_running_tasks=config.DEFAULT_TASK_COUNT,
            additional_options={
                "service": {
                    "service_account": service_account["name"],
                    "service_account_secret": service_account["secret"],
                    "security": {
                        "transport_encryption": {
                            "enabled": True
                        }
                    }
                }
            },
            timeout_seconds=30 * 60)

        sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)

        yield service_account
    finally:
        sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)
Example #10
0
def test_secrets_dcos_space():
    # 1) create secrets in hello-world/somePath, i.e. hello-world/somePath/secret1 ...
    # 2) Tasks with DCOS_SPACE hello-world/somePath
    #       or some DCOS_SPACE path under hello-world/somePath
    #               (for example hello-world/somePath/anotherPath/)
    #    can access these Secrets

    install.uninstall(PACKAGE_NAME)

    # cannot access these secrets because of DCOS_SPACE authorization
    create_secrets("{}/somePath/".format(PACKAGE_NAME))

    try:
        install.install(PACKAGE_NAME,
                        NUM_HELLO + NUM_WORLD,
                        additional_options=options_dcos_space_test)

        plan.wait_for_completed_deployment(PACKAGE_NAME)

        assert False, "Should have failed to install"

    except AssertionError as arg:
        raise arg

    except:
        pass  # expected to fail

    # clean up and delete secrets
    delete_secrets("{}/somePath/".format(PACKAGE_NAME))
Example #11
0
def install(package_name,
            service_name,
            expected_running_tasks,
            additional_options={},
            package_version=None,
            timeout_seconds=TIMEOUT_SECONDS,
            wait_for_deployment=True):
    start = time.time()
    merged_options = get_package_options(additional_options)

    log.info('Installing {}/{} with options={} version={}'.format(
        package_name, service_name, merged_options, package_version))

    # 1. Install package, wait for tasks, wait for marathon deployment
    retried_shakedown_install(package_name, service_name, package_version,
                              merged_options, timeout_seconds,
                              expected_running_tasks)

    # 2. Wait for the scheduler to be idle (as implied by deploy plan completion and suppressed bit)
    # This should be skipped ONLY when it's known that the scheduler will be stuck in an incomplete state.
    if wait_for_deployment:
        # this can take a while, default is 15 minutes. for example with HDFS, we can hit the expected
        # total task count via FINISHED tasks, without actually completing deployment
        log.info("Waiting for {}/{} to finish deployment plan...".format(
            package_name, service_name))
        sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds)

    log.info('Installed {}/{} after {}'.format(
        package_name, service_name,
        shakedown.pretty_duration(time.time() - start)))
Example #12
0
def test_task_dns_prefix_points_to_all_tasks():
    pod_info = sdk_cmd.service_request('GET', config.SERVICE_NAME,
                                       '/v1/pod/hello-0/info').json()
    # Assert that DiscoveryInfo is correctly set on tasks.
    assert (all(p["info"]["discovery"]["name"] == "hello-0" for p in pod_info))
    # Assert that the hello-0.hello-world.mesos DNS entry points to the right IP.
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
def test_disable_quota_role():

    # Add new pods to service which should be launched with the new role.
    marathon_config = sdk_marathon.get_config(SERVICE_NAME)

    # Turn off legacy role.
    marathon_config["env"]["ENABLE_ROLE_MIGRATION"] = "false"

    # Update the app
    sdk_marathon.update_app(marathon_config)

    # Wait for scheduler to restart.
    sdk_plan.wait_for_completed_deployment(SERVICE_NAME)

    # Get the current service state to verify roles have applied.
    service_roles = sdk_utils.get_service_roles(SERVICE_NAME)
    current_task_roles = service_roles["task-roles"]

    # We must have some role!
    assert len(current_task_roles) > 0
    assert len(current_task_roles) == 3

    assert LEGACY_ROLE in current_task_roles.values()
    assert ENFORCED_ROLE not in current_task_roles.values()

    # Ensure we're not MULTI_ROLE, and only using the legacy-role.
    assert service_roles["framework-roles"] is None
    assert service_roles["framework-role"] == LEGACY_ROLE
Example #14
0
def test_custom_decommission():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    marathon_config = sdk_marathon.get_config(foldered_name)
    marathon_config["env"]["WORLD_COUNT"] = "1"
    sdk_marathon.update_app(marathon_config)

    sdk_plan.wait_for_completed_plan(foldered_name, "decommission")
    decommission_plan = sdk_plan.get_decommission_plan(foldered_name)
    log.info(sdk_plan.plan_string("decommission", decommission_plan))

    custom_step_name = decommission_plan["phases"][0]["steps"][0]["name"]
    assert "custom_decommission_step" == custom_step_name

    # scale back up
    marathon_config = sdk_marathon.get_config(foldered_name)
    marathon_config["env"]["WORLD_COUNT"] = "2"
    sdk_marathon.update_app(marathon_config)
    sdk_plan.wait_for_completed_deployment(foldered_name)

    # Let's decommission again!
    marathon_config = sdk_marathon.get_config(foldered_name)
    marathon_config["env"]["WORLD_COUNT"] = "1"
    sdk_marathon.update_app(marathon_config)
    sdk_plan.wait_for_completed_deployment(foldered_name)

    sdk_plan.wait_for_completed_plan(foldered_name, "decommission")
    decommission_plan = sdk_plan.get_decommission_plan(foldered_name)
    log.info(sdk_plan.plan_string("decommission", decommission_plan))

    custom_step_name = decommission_plan["phases"][0]["steps"][0]["name"]
    assert "custom_decommission_step" == custom_step_name
Example #15
0
def test_metrics() -> None:
    expected_metrics = [
        "node.data-0-node.fs.total.total_in_bytes",
        "node.data-0-node.jvm.mem.pools.old.peak_used_in_bytes",
        "node.data-0-node.jvm.threads.count",
    ]

    def expected_metrics_exist(emitted_metrics: List[str]) -> bool:
        # Elastic metrics are also dynamic and based on the service name# For eg:
        # elasticsearch.test__integration__elastic.node.data-0-node.thread_pool.listener.completed
        # To prevent this from breaking we drop the service name from the metric name
        # => data-0-node.thread_pool.listener.completed
        metric_names = [".".join(metric_name.split(".")[2:]) for metric_name in emitted_metrics]
        return sdk_metrics.check_metrics_presence(metric_names, expected_metrics)

    sdk_metrics.wait_for_service_metrics(
        package_name,
        service_name,
        "data-0",
        "data-0-node",
        config.DEFAULT_TIMEOUT,
        expected_metrics_exist,
    )

    sdk_plan.wait_for_completed_deployment(service_name)
    sdk_plan.wait_for_completed_recovery(service_name)
Example #16
0
def elastic_service_tls(service_account):
    sdk_install.install(
        PACKAGE_NAME,
        service_name=SERVICE_NAME,
        expected_running_tasks=NO_INGEST_TASK_COUNT,
        additional_options={
            "service": {
                "service_account_secret": service_account,
                "service_account": service_account,
                "tls": True,
            },
            "elasticsearch": {
                "xpack_enabled": True,
            }
        }
    )

    sdk_plan.wait_for_completed_deployment(SERVICE_NAME)

    # Wait for service health check to pass
    shakedown.service_healthy(SERVICE_NAME)

    yield

    sdk_install.uninstall(PACKAGE_NAME, SERVICE_NAME)
Example #17
0
def test_metrics():
    expected_metrics = [
        "node.data-0-node.fs.total.total_in_bytes",
        "node.data-0-node.jvm.mem.pools.old.peak_used_in_bytes",
        "node.data-0-node.jvm.threads.count",
    ]

    def expected_metrics_exist(emitted_metrics):
        # Elastic metrics are also dynamic and based on the service name# For eg:
        # elasticsearch.test__integration__elastic.node.data-0-node.thread_pool.listener.completed
        # To prevent this from breaking we drop the service name from the metric name
        # => data-0-node.thread_pool.listener.completed
        metric_names = [
            ".".join(metric_name.split(".")[2:])
            for metric_name in emitted_metrics
        ]
        return sdk_metrics.check_metrics_presence(metric_names,
                                                  expected_metrics)

    sdk_metrics.wait_for_service_metrics(
        config.PACKAGE_NAME,
        foldered_name,
        "data-0",
        "data-0-node",
        config.DEFAULT_TIMEOUT,
        expected_metrics_exist,
    )

    sdk_plan.wait_for_completed_deployment(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)
def test_old_tasks_get_relaunched_with_new_config():
    hello_task_id = sdk_tasks.get_task_ids(config.SERVICE_NAME, "hello")
    assert len(hello_task_id) > 0, "Got an empty list of task_ids"
    # Start update plan with options that have list of yaml files to make it
    # launch in multi service mode with updated config
    sdk_upgrade.update_or_upgrade_or_downgrade(
        config.PACKAGE_NAME,
        config.SERVICE_NAME,
        to_version=None,
        to_options={
            "service": {
                "yaml": "",
                "yamls": "svc,foobar_service_name"
            },
            "hello": {
                "cpus": 0.2
            },
        },
        expected_running_tasks=4,
        wait_for_deployment=False,
    )
    # Ensure the old task DOES relaunch
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME,
                                           multiservice_name="foobar")
    sdk_tasks.check_task_relaunched("hello-0-server", hello_task_id.pop())
    assert len(sdk_tasks.get_task_ids(config.SERVICE_NAME, "foo")) == 1
Example #19
0
def setup_constraint_switch():
    sdk_install.uninstall(PACKAGE_NAME)

    agents = shakedown.get_private_agents()
    some_agent = agents[0]
    other_agent = agents[1]
    log.info('Agents: %s %s', some_agent, other_agent)
    assert some_agent != other_agent
    options = {
        "service": {
            "spec_file": "examples/marathon_constraint.yml"
        },
        "hello": {
            "count": 1,
            # First, we stick the pod to some_agent
            "placement": 'hostname:LIKE:{}'.format(some_agent)
        },
        "world": {
            "count": 0
        }
    }
    sdk_install.install(PACKAGE_NAME, 1, additional_options=options)
    sdk_tasks.check_running(PACKAGE_NAME, 1)
    hello_ids = sdk_tasks.get_task_ids(PACKAGE_NAME, 'hello')

    # Now, stick it to other_agent
    config = sdk_marathon.get_config(PACKAGE_NAME)
    config['env']['HELLO_PLACEMENT'] = 'hostname:LIKE:{}'.format(other_agent)
    sdk_marathon.update_app(PACKAGE_NAME, config)
    # Wait for the scheduler to be up and settled before advancing.
    sdk_plan.wait_for_completed_deployment(PACKAGE_NAME)

    return some_agent, other_agent, hello_ids
def test_node_replace_replaces_node():
    replace_task = [
        task for task in sdk_tasks.get_summary()
        if task.name == "node-2-server"
    ][0]
    log.info("avoid host for task {}".format(replace_task))

    replace_pod_name = replace_task.name[:-len("-server")]

    # Update the placement constraints so the new node doesn't end up on the same host
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    original_constraint = marathon_config["env"]["PLACEMENT_CONSTRAINT"]
    try:
        marathon_config["env"][
            "PLACEMENT_CONSTRAINT"] = '[["hostname", "UNLIKE", "{}"]]'.format(
                replace_task.host)
        sdk_marathon.update_app(marathon_config)

        sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)

        # start replace and wait for it to finish
        sdk_cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME,
                        "pod replace {}".format(replace_pod_name))
        sdk_plan.wait_for_kicked_off_recovery(config.SERVICE_NAME)
        sdk_plan.wait_for_completed_recovery(
            config.SERVICE_NAME, timeout_seconds=RECOVERY_TIMEOUT_SECONDS)

    finally:
        # revert to prior placement setting before proceeding with tests: avoid getting stuck.
        marathon_config["env"]["PLACEMENT_CONSTRAINT"] = original_constraint
        sdk_marathon.update_app(marathon_config)

        sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
Example #21
0
def cassandra_service_tls(service_account):
    sdk_install.uninstall(package_name=config.PACKAGE_NAME, service_name=config.SERVICE_NAME)
    sdk_install.install(
        config.PACKAGE_NAME,
        config.SERVICE_NAME,
        config.DEFAULT_TASK_COUNT,
        additional_options={
            "service": {
                "service_account": service_account["name"],
                "service_account_secret": service_account["secret"],
                "security": {
                    "transport_encryption": {
                        "enabled": True
                    }
                }
            }
        }
    )

    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)

    # Wait for service health check to pass
    shakedown.service_healthy(config.SERVICE_NAME)

    yield

    sdk_install.uninstall(package_name=config.PACKAGE_NAME, service_name=config.SERVICE_NAME)
Example #22
0
def check_healthy(service_name, count=DEFAULT_TASK_COUNT, recovery_expected=False):
    sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds=25 * 60)
    if recovery_expected:
        # TODO(elezar): See INFINITY-2109 where we need to better handle recovery health checks
        sdk_plan.wait_for_kicked_off_recovery(service_name, timeout_seconds=25 * 60)
    sdk_plan.wait_for_completed_recovery(service_name, timeout_seconds=25 * 60)
    sdk_tasks.check_running(service_name, count)
Example #23
0
def cassandra_service_tls(service_account):
    sdk_install.uninstall(package_name=config.PACKAGE_NAME,
                          service_name=config.SERVICE_NAME)
    sdk_install.install(config.PACKAGE_NAME,
                        config.SERVICE_NAME,
                        config.DEFAULT_TASK_COUNT,
                        additional_options={
                            "service": {
                                "service_account": service_account["name"],
                                "service_account_secret":
                                service_account["secret"],
                                "security": {
                                    "transport_encryption": {
                                        "enabled": True
                                    }
                                }
                            }
                        })

    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)

    # Wait for service health check to pass
    shakedown.service_healthy(config.SERVICE_NAME)

    yield

    sdk_install.uninstall(package_name=config.PACKAGE_NAME,
                          service_name=config.SERVICE_NAME)
Example #24
0
def test_custom_zookeeper():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    broker_ids = sdk_tasks.get_task_ids(foldered_name, '{}-'.format(config.DEFAULT_POD_TYPE))

    # create a topic against the default zk:
    test_utils.create_topic(config.DEFAULT_TOPIC_NAME, service_name=foldered_name)

    marathon_config = sdk_marathon.get_config(foldered_name)
    # should be using default path when this envvar is empty/unset:
    assert marathon_config['env']['KAFKA_ZOOKEEPER_URI'] == ''

    # use a custom zk path that's WITHIN the 'dcos-service-' path, so that it's automatically cleaned up in uninstall:
    zk_path = 'master.mesos:2181/{}/CUSTOMPATH'.format(sdk_utils.get_zk_path(foldered_name))
    marathon_config['env']['KAFKA_ZOOKEEPER_URI'] = zk_path
    sdk_marathon.update_app(foldered_name, marathon_config)

    sdk_tasks.check_tasks_updated(foldered_name, '{}-'.format(config.DEFAULT_POD_TYPE), broker_ids)
    sdk_plan.wait_for_completed_deployment(foldered_name)

    # wait for brokers to finish registering
    test_utils.broker_count_check(config.DEFAULT_BROKER_COUNT, service_name=foldered_name)

    zookeeper = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'endpoints zookeeper')
    assert zookeeper.rstrip('\n') == zk_path

    # topic created earlier against default zk should no longer be present:
    topic_list_info = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'topic list', json=True)

    test_utils.assert_topic_lists_are_equal_without_automatic_topics([], topic_list_info)
Example #25
0
def setup_constraint_switch():
    sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)

    agents = shakedown.get_private_agents()
    some_agent = agents[0]
    other_agent = agents[1]
    log.info('Agents: %s %s', some_agent, other_agent)
    assert some_agent != other_agent
    options = _escape_placement_for_1_9({
        "service": {
            "yaml": "marathon_constraint"
        },
        "hello": {
            "count": 1,
            # First, we stick the pod to some_agent
            "placement": "[[\"hostname\", \"LIKE\", \"{}\"]]".format(some_agent)
        },
        "world": {
            "count": 0
        }
    })
    sdk_install.install(config.PACKAGE_NAME, config.SERVICE_NAME, 1, additional_options=options)
    sdk_tasks.check_running(config.SERVICE_NAME, 1)
    hello_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello')

    # Now, stick it to other_agent
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    marathon_config['env']['HELLO_PLACEMENT'] = "[[\"hostname\", \"LIKE\", \"{}\"]]".format(other_agent)
    sdk_marathon.update_app(config.SERVICE_NAME, marathon_config)
    # Wait for the scheduler to be up and settled before advancing.
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)

    return some_agent, other_agent, hello_ids
Example #26
0
def check_tasks_not_updated(service_name, prefix, old_task_ids):
    sdk_plan.wait_for_completed_deployment(service_name)
    sdk_plan.wait_for_completed_recovery(service_name)
    task_ids = get_task_ids(service_name, prefix)
    task_sets = "\n- Old tasks: {}\n- Current tasks: {}".format(sorted(old_task_ids), sorted(task_ids))
    log.info('Checking tasks starting with "{}" have not been updated:{}'.format(prefix, task_sets))
    assert set(old_task_ids).issubset(set(task_ids)), 'Tasks starting with "{}" were updated:{}'.format(prefix, task_sets)
Example #27
0
def check_healthy(service_name, count=DEFAULT_TASK_COUNT, recovery_expected=False):
    sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds=25 * 60)
    if recovery_expected:
        # TODO(elezar): See INFINITY-2109 where we need to better handle recovery health checks
        sdk_plan.wait_for_kicked_off_recovery(service_name, timeout_seconds=25 * 60)
    sdk_plan.wait_for_completed_recovery(service_name, timeout_seconds=25 * 60)
    sdk_tasks.check_running(service_name, count)
Example #28
0
def test_hostname_unique():
    install.uninstall(PACKAGE_NAME)
    options = {
        "service": {
            "spec_file": "examples/marathon_constraint.yml"
        },
        "hello": {
            "count": num_private_agents,
            "placement": "hostname:UNIQUE"
        },
        "world": {
            "count": num_private_agents,
            "placement": "hostname:UNIQUE"
        }
    }

    install.install(PACKAGE_NAME,
                    num_private_agents * 2,
                    additional_options=options)
    # hello deploys first. One "world" task should end up placed with each "hello" task.
    plan.wait_for_completed_deployment(PACKAGE_NAME)

    # ensure "hello" task can still be placed with "world" task
    cmd.run_cli('hello-world pods replace hello-0')
    tasks.check_running(PACKAGE_NAME,
                        num_private_agents * 2 - 1,
                        timeout_seconds=10)
    tasks.check_running(PACKAGE_NAME, num_private_agents * 2)
    ensure_multiple_per_agent(hello=1, world=1)
Example #29
0
def hdfs_service_tls(service_account):
    try:
        sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)
        sdk_install.install(config.PACKAGE_NAME,
                            service_name=config.SERVICE_NAME,
                            expected_running_tasks=config.DEFAULT_TASK_COUNT,
                            additional_options={
                                "service": {
                                    "service_account":
                                    service_account["name"],
                                    "service_account_secret":
                                    service_account["secret"],
                                    "security": {
                                        "transport_encryption": {
                                            "enabled": True
                                        }
                                    }
                                }
                            },
                            timeout_seconds=30 * 60)

        sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)

        yield service_account
    finally:
        sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)
Example #30
0
def kafka_service_tls(service_account):
    try:
        sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)
        config.install(
            config.PACKAGE_NAME,
            config.SERVICE_NAME,
            config.DEFAULT_BROKER_COUNT,
            additional_options={
                "service": {
                    "service_account": service_account["name"],
                    "service_account_secret": service_account["secret"],
                    "security": {
                        "transport_encryption": {
                            "enabled": True
                        }
                    }
                }
            }
        )

        sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)

        yield service_account
    finally:
        sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)
Example #31
0
def test_more_pods_disable_legacy_role_post_update():
    # Ensure we can scale out more still with legacy role disabled.

    # Add new pods to service which should be launched with the new role.
    marathon_config = sdk_marathon.get_config(SERVICE_NAME)

    # Add an extra pod to each.
    marathon_config["env"]["HELLO_COUNT"] = "3"
    marathon_config["env"]["WORLD_COUNT"] = "4"

    # Update the app
    sdk_marathon.update_app(marathon_config)

    # Wait for scheduler to restart.
    sdk_plan.wait_for_completed_deployment(SERVICE_NAME)

    # Get the current service state to verify roles have applied.
    service_roles = sdk_utils.get_service_roles(SERVICE_NAME)
    current_task_roles = service_roles["task-roles"]

    # We must have some role!
    assert len(current_task_roles) > 0
    assert len(current_task_roles) == 7

    assert LEGACY_ROLE not in current_task_roles.values()
    assert ENFORCED_ROLE in current_task_roles.values()

    # Ensure we're MULTI_ROLE
    assert service_roles["framework-roles"] is None
    assert service_roles["framework-role"] == ENFORCED_ROLE
def test_node_replace_replaces_node():
    replace_task = [
        task for task in sdk_tasks.get_summary()
        if task.name == 'node-2-server'][0]
    log.info('avoid host for task {}'.format(replace_task))

    replace_pod_name = replace_task.name[:-len('-server')]

    # Update the placement constraints so the new node doesn't end up on the same host
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    original_constraint = marathon_config['env']['PLACEMENT_CONSTRAINT']
    try:
        marathon_config['env']['PLACEMENT_CONSTRAINT'] = '[["hostname", "UNLIKE", "{}"]]'.format(replace_task.host)
        sdk_marathon.update_app(config.SERVICE_NAME, marathon_config)

        sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)

        # start replace and wait for it to finish
        sdk_cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME, 'pod replace {}'.format(replace_pod_name))
        sdk_plan.wait_for_kicked_off_recovery(config.SERVICE_NAME)
        sdk_plan.wait_for_completed_recovery(config.SERVICE_NAME, timeout_seconds=RECOVERY_TIMEOUT_SECONDS)

    finally:
        # revert to prior placement setting before proceeding with tests: avoid getting stuck.
        marathon_config['env']['PLACEMENT_CONSTRAINT'] = original_constraint
        sdk_marathon.update_app(config.SERVICE_NAME, marathon_config)

        sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
Example #33
0
def setup_constraint_switch():
    sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)

    agents = sdk_agents.get_private_agents()
    some_agent = agents[0]["hostname"]
    other_agent = agents[1]["hostname"]
    log.info("Agents: %s %s", some_agent, other_agent)
    assert some_agent != other_agent
    options = _escape_placement_for_1_9(
        {
            "service": {"yaml": "marathon_constraint"},
            "hello": {
                "count": 1,
                # First, we stick the pod to some_agent
                "placement": '[["hostname", "LIKE", "{}"]]'.format(some_agent),
            },
            "world": {"count": 0},
        }
    )
    sdk_install.install(config.PACKAGE_NAME, config.SERVICE_NAME, 1, additional_options=options)
    sdk_tasks.check_running(config.SERVICE_NAME, 1)
    hello_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, "hello")

    # Now, stick it to other_agent
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    marathon_config["env"]["HELLO_PLACEMENT"] = '[["hostname", "LIKE", "{}"]]'.format(other_agent)
    sdk_marathon.update_app(marathon_config)
    # Wait for the scheduler to be up and settled before advancing.
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)

    return some_agent, other_agent, hello_ids
Example #34
0
def test_custom_zookeeper():
    broker_ids = tasks.get_task_ids(FOLDERED_SERVICE_NAME,
                                    '{}-'.format(DEFAULT_POD_TYPE))

    # sanity check: brokers should be reinitialized:
    brokers = service_cli('broker list', service_name=FOLDERED_SERVICE_NAME)
    assert set(brokers) == set([str(i) for i in range(DEFAULT_BROKER_COUNT)])

    # create a topic against the default zk:
    service_cli('topic create {}'.format(DEFAULT_TOPIC_NAME),
                service_name=FOLDERED_SERVICE_NAME)
    assert service_cli('topic list', service_name=FOLDERED_SERVICE_NAME) == [
        DEFAULT_TOPIC_NAME
    ]

    config = marathon.get_config(FOLDERED_SERVICE_NAME)
    # should be using default path when this envvar is empty/unset:
    assert config['env']['KAFKA_ZOOKEEPER_URI'] == ''

    # use a custom zk path that's WITHIN the 'dcos-service-' path, so that it's automatically cleaned up in uninstall:
    zk_path = 'master.mesos:2181/dcos-service-test__integration__kafka/CUSTOMPATH'
    config['env']['KAFKA_ZOOKEEPER_URI'] = zk_path
    marathon.update_app(FOLDERED_SERVICE_NAME, config)

    tasks.check_tasks_updated(FOLDERED_SERVICE_NAME,
                              '{}-'.format(DEFAULT_POD_TYPE), broker_ids)
    plan.wait_for_completed_deployment(FOLDERED_SERVICE_NAME)

    zookeeper = service_cli('endpoints zookeeper',
                            get_json=False,
                            service_name=FOLDERED_SERVICE_NAME)
    assert zookeeper.rstrip('\n') == zk_path

    # topic created earlier against default zk should no longer be present:
    assert service_cli('topic list', service_name=FOLDERED_SERVICE_NAME) == []
Example #35
0
def test_changing_discovery_replaces_certificate_sans():
    """
    Update service configuration to change discovery prefix of a task.
    Scheduler should update task and new SANs should be generated.
    """
    original_tasks = sdk_tasks.get_task_ids(config.PACKAGE_NAME, 'discovery')
    assert len(original_tasks) == 1, 'Expecting exactly one task ID'

    task_id = original_tasks[0]
    assert task_id

    # Load end-entity certificate from PEM encoded file
    _, stdout, _ = sdk_cmd.task_exec(task_id, 'cat server.crt')
    log.info('first server.crt: {}'.format(stdout))

    ascii_cert = stdout.encode('ascii')
    log.info('first server.crt ascii encoded: {}'.format(ascii_cert))

    end_entity_cert = x509.load_pem_x509_certificate(ascii_cert,
                                                     DEFAULT_BACKEND)

    san_extension = end_entity_cert.extensions.get_extension_for_oid(
        ExtensionOID.SUBJECT_ALTERNATIVE_NAME)
    sans = [
        san.value for san in san_extension.value._general_names._general_names
    ]

    expected_san = (
        '{name}-0.{service_name}.autoip.dcos.thisdcos.directory'.format(
            name=DISCOVERY_TASK_PREFIX, service_name=config.SERVICE_NAME))
    assert expected_san in sans

    # Run task update with new discovery prefix
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    marathon_config['env'][
        'DISCOVERY_TASK_PREFIX'] = DISCOVERY_TASK_PREFIX + '-new'
    sdk_marathon.update_app(config.SERVICE_NAME, marathon_config)
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)

    task_id = sdk_tasks.get_task_ids(config.SERVICE_NAME, "discovery")[0]

    _, stdout, _ = sdk_cmd.task_exec(task_id, 'cat server.crt')
    log.info('second server.crt: {}'.format(stdout))

    ascii_cert = stdout.encode('ascii')
    log.info('second server.crt ascii encoded: {}'.format(ascii_cert))
    new_cert = x509.load_pem_x509_certificate(ascii_cert, DEFAULT_BACKEND)

    san_extension = new_cert.extensions.get_extension_for_oid(
        ExtensionOID.SUBJECT_ALTERNATIVE_NAME)
    sans = [
        san.value for san in san_extension.value._general_names._general_names
    ]

    expected_san = (
        '{name}-0.{service_name}.autoip.dcos.thisdcos.directory'.format(
            name=DISCOVERY_TASK_PREFIX + '-new',
            service_name=config.SERVICE_NAME))
    assert expected_san in sans
Example #36
0
def _upgrade_or_downgrade(package_name, to_package_version, service_name,
                          running_task_count, additional_options,
                          timeout_seconds, wait_for_deployment):

    initial_config = get_config(package_name, service_name)
    task_ids = sdk_tasks.get_task_ids(service_name, '')

    if sdk_utils.dcos_version_less_than(
            "1.10") or shakedown.ee_version() is None:
        log.info('Using marathon upgrade flow to upgrade {} {}'.format(
            package_name, to_package_version))
        sdk_marathon.destroy_app(service_name)
        sdk_install.install(package_name,
                            service_name,
                            running_task_count,
                            additional_options=additional_options,
                            package_version=to_package_version,
                            timeout_seconds=timeout_seconds,
                            wait_for_deployment=wait_for_deployment)
    else:
        log.info('Using CLI upgrade flow to upgrade {} {}'.format(
            package_name, to_package_version))
        if additional_options:
            with tempfile.NamedTemporaryFile() as opts_f:
                opts_f.write(json.dumps(additional_options).encode('utf-8'))
                opts_f.flush(
                )  # ensure json content is available for the CLI to read below
                sdk_cmd.svc_cli(
                    package_name, service_name,
                    'update start --package-version={} --options={}'.format(
                        to_package_version, opts_f.name))
        else:
            sdk_cmd.svc_cli(
                package_name, service_name,
                'update start --package-version={}'.format(to_package_version))
        # we must manually upgrade the package CLI because it's not done automatically in this flow
        # (and why should it? that'd imply the package CLI replacing itself via a call to the main CLI...)
        sdk_cmd.run_cli(
            'package install --yes --cli --package-version={} {}'.format(
                to_package_version, package_name))

    if wait_for_deployment:

        updated_config = get_config(package_name, service_name)

        if updated_config == initial_config:
            log.info(
                'No config change detected. Tasks should not be restarted')
            sdk_tasks.check_tasks_not_updated(service_name, '', task_ids)
        else:
            log.info('Checking that all tasks have restarted')
            sdk_tasks.check_tasks_updated(service_name, '', task_ids)

        # this can take a while, default is 15 minutes. for example with HDFS, we can hit the expected
        # total task count via ONCE tasks, without actually completing deployment
        log.info(
            "Waiting for package={} service={} to finish deployment plan...".
            format(package_name, service_name))
        sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds)
Example #37
0
def install(
    package_name: str,
    service_name: str,
    expected_running_tasks: int,
    additional_options: dict = {},
    package_version: PackageVersion = PackageVersion.STUB_UNIVERSE,
    timeout_seconds: int = TIMEOUT_SECONDS,
    wait_for_deployment: bool = True,
    insert_strict_options: bool = True,
    wait_for_all_conditions: bool = True,
) -> None:
    start = time.time()

    # If the package is already installed at this point, fail immediately.
    if sdk_marathon.app_exists(service_name):
        raise Exception(
            "Service is already installed: {}".format(service_name))

    if insert_strict_options and sdk_utils.is_strict_mode():
        # strict mode requires correct principal and secret to perform install.
        # see also: sdk_security.py
        options = sdk_utils.merge_dictionaries(
            {
                "service": {
                    "service_account": "service-acct",
                    "principal": "service-acct",
                    "service_account_secret": "secret",
                    "secret_name": "secret",
                }
            },
            additional_options,
        )
    else:
        options = additional_options

    # 1. Install package, wait for tasks, wait for marathon deployment
    _retried_install_impl(
        package_name, service_name,
        expected_running_tasks, package_version.value if isinstance(
            package_version, PackageVersion) else package_version, options,
        timeout_seconds, wait_for_all_conditions)

    # 2. Wait for the scheduler to be idle (as implied by deploy plan completion and suppressed bit)
    # This should be skipped ONLY when it's known that the scheduler will be stuck in an incomplete
    # state, or if the thing being installed doesn't have a deployment plan (e.g. standalone app)
    if wait_for_deployment:
        # this can take a while, default is 15 minutes. for example with HDFS, we can hit the expected
        # total task count via FINISHED tasks, without actually completing deployment
        log.info(
            "Waiting for package={} service={} to finish deployment plan...".
            format(package_name, service_name))
        sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds)

    log.info("Installed package={} service={} after {}".format(
        package_name, service_name,
        sdk_utils.pretty_duration(time.time() - start)))

    global _installed_service_names
    _installed_service_names.add(service_name)
Example #38
0
def test_deploy():
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
    deployment_plan = sdk_plan.get_deployment_plan(config.SERVICE_NAME)
    log.info(sdk_plan.plan_string("deploy", deployment_plan))

    assert len(deployment_plan["phases"]) == 1
    assert deployment_plan["phases"][0]["name"] == "hello"
    assert len(deployment_plan["phases"][0]["steps"]) == 1
Example #39
0
def test_custom_seccomp_profile():
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)

    # uname will now be dissalowed and svc should crashloop
    marathon_config["env"]["HELLO_SECCOMP_PROFILE_NAME"] = "test_profile.json"
    sdk_marathon.update_app(marathon_config)
    sdk_marathon.wait_for_deployment(config.SERVICE_NAME, 60, None)
def test_enable():
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
    sdk_plan.recovery_plan_is_empty(config.SERVICE_NAME)
    sdk_tasks.check_running(config.SERVICE_NAME, 3)
    set_test_boolean('true')
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
    sdk_tasks.check_running(config.SERVICE_NAME, 6)
    sdk_plan.recovery_plan_is_empty(config.SERVICE_NAME)
Example #41
0
def test_indexing(default_populated_index: None) -> None:
    indices_stats = config.get_elasticsearch_indices_stats(index_name, service_name=service_name)
    assert indices_stats["_all"]["primaries"]["docs"]["count"] == 1
    doc = config.get_document(index_name, index_type, 1, service_name=service_name)
    assert doc["_source"]["name"] == "Loren"

    sdk_plan.wait_for_completed_deployment(service_name)
    sdk_plan.wait_for_completed_recovery(service_name)
def test_enable():
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
    sdk_plan.recovery_plan_is_empty(config.SERVICE_NAME)
    sdk_tasks.check_running(config.SERVICE_NAME, 3, timeout_seconds=30, allow_more=False)
    set_test_boolean("true")
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
    sdk_tasks.check_running(config.SERVICE_NAME, 6, timeout_seconds=30, allow_more=False)
    sdk_plan.recovery_plan_is_empty(config.SERVICE_NAME)
Example #43
0
def test_deploy():
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
    deployment_plan = sdk_plan.get_deployment_plan(config.SERVICE_NAME)
    log.info("deployment_plan: " + str(deployment_plan))

    assert(len(deployment_plan['phases']) == 1)
    assert(deployment_plan['phases'][0]['name'] == 'hello')
    assert(len(deployment_plan['phases'][0]['steps']) == 1)
Example #44
0
def test_losing_and_regaining_index_health(default_populated_index):
    config.check_elasticsearch_index_health(config.DEFAULT_INDEX_NAME, "green", service_name=foldered_name)
    shakedown.kill_process_on_host(sdk_hosts.system_host(foldered_name, "data-0-node"), "data__.*Elasticsearch")
    config.check_elasticsearch_index_health(config.DEFAULT_INDEX_NAME, "yellow", service_name=foldered_name)
    config.check_elasticsearch_index_health(config.DEFAULT_INDEX_NAME, "green", service_name=foldered_name)

    sdk_plan.wait_for_completed_deployment(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)
Example #45
0
def test_mesos_v0_api():
    prior_api_version = sdk_marathon.get_mesos_api_version(foldered_name)
    if prior_api_version is not "V0":
        sdk_marathon.set_mesos_api_version(foldered_name, "V0")
        sdk_marathon.set_mesos_api_version(foldered_name, prior_api_version)

    sdk_plan.wait_for_completed_deployment(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)
Example #46
0
def test_indexing(default_populated_index):
    indices_stats = config.get_elasticsearch_indices_stats(config.DEFAULT_INDEX_NAME, service_name=foldered_name)
    assert indices_stats["_all"]["primaries"]["docs"]["count"] == 1
    doc = config.get_document(config.DEFAULT_INDEX_NAME, config.DEFAULT_INDEX_TYPE, 1, service_name=foldered_name)
    assert doc["_source"]["name"] == "Loren"

    sdk_plan.wait_for_completed_deployment(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)
Example #47
0
def test_deploy():
    sdk_plan.wait_for_completed_deployment(config.PACKAGE_NAME)
    deployment_plan = sdk_plan.get_deployment_plan(config.PACKAGE_NAME)
    log.info("deployment_plan: " + str(deployment_plan))

    assert (len(deployment_plan['phases']) == 1)
    assert (deployment_plan['phases'][0]['name'] == 'hello')
    assert (len(deployment_plan['phases'][0]['steps']) == 1)
Example #48
0
def check_permanent_recovery(
    package_name: str,
    service_name: str,
    pod_name: str,
    recovery_timeout_s: int,
    pods_with_updated_tasks: Optional[List[str]] = None,
) -> None:
    """
    Perform a replace (permanent recovery) operation on the specified pod.

    The specified pod AND any additional pods in `pods_with_updated_tasks` are
    checked to ensure that their tasks have been restarted.

    Any remaining pods are checked to ensure that their tasks are not changed.

    For example, performing a pod replace kafka-0 on a Kafka framework should
    result in ONLY the kafa-0-broker task being restarted. In this case,
    pods_with_updated_tasks is specified as None.

    When performing a pod replace operation on a Cassandra seed node (node-0),
    a rolling restart of other nodes is triggered, and
    pods_with_updated_tasks = ["node-0", "node-1", "node-2"]
    (assuming a three node Cassandra ring)
    """
    LOG.info("Testing pod replace operation for %s:%s", service_name, pod_name)

    sdk_plan.wait_for_completed_deployment(service_name)
    sdk_plan.wait_for_completed_recovery(service_name)

    rc, stdout, _ = sdk_cmd.svc_cli(package_name, service_name, "pod list")
    assert rc == 0, "Pod list failed"
    pod_list = set(json.loads(stdout))

    pods_with_updated_tasks = pods_with_updated_tasks if pods_with_updated_tasks else []
    pods_to_update = set(pods_with_updated_tasks + [pod_name])

    tasks_to_replace = {}
    for pod in pods_to_update:
        tasks_to_replace[pod] = set(sdk_tasks.get_task_ids(service_name, pod_name))

    LOG.info("The following tasks will be replaced: %s", tasks_to_replace)

    tasks_in_other_pods = {}
    for pod in pod_list - pods_to_update:
        tasks_in_other_pods[pod] = set(sdk_tasks.get_task_ids(service_name, pod))

    LOG.info("Tasks in other pods should not be replaced: %s", tasks_in_other_pods)

    sdk_cmd.svc_cli(package_name, service_name, "pod replace {}".format(pod_name))

    sdk_plan.wait_for_kicked_off_recovery(service_name, recovery_timeout_s)
    sdk_plan.wait_for_completed_recovery(service_name, recovery_timeout_s)

    for pod, tasks in tasks_to_replace.items():
        sdk_tasks.check_tasks_updated(service_name, pod, tasks)

    for pod, tasks in tasks_in_other_pods.items():
        sdk_tasks.check_tasks_not_updated(service_name, pod, tasks)
Example #49
0
def test_task_dns_prefix_points_to_all_tasks():
    pod_info = dcos.http.get(
        shakedown.dcos_service_url(config.SERVICE_NAME) +
        "/v1/pod/{}/info".format("hello-0")).json()

    # Assert that DiscoveryInfo is correctly set on tasks.
    assert(all(p["info"]["discovery"]["name"] == "hello-0" for p in pod_info))
    # Assert that the hello-0.hello-world.mesos DNS entry points to the right IP.
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
Example #50
0
def test_plugin_install_and_uninstall(default_populated_index):
    plugin_name = 'analysis-phonetic'
    config.update_app(foldered_name, {'TASKCFG_ALL_ELASTICSEARCH_PLUGINS': plugin_name}, current_expected_task_count)
    config.check_elasticsearch_plugin_installed(plugin_name, service_name=foldered_name)

    config.update_app(foldered_name, {'TASKCFG_ALL_ELASTICSEARCH_PLUGINS': ''}, current_expected_task_count)
    config.check_elasticsearch_plugin_uninstalled(plugin_name, service_name=foldered_name)
    sdk_plan.wait_for_completed_deployment(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)
Example #51
0
def test_uninstall():
    config.check_running()

    # add the needed envvar in marathon and confirm that the uninstall "deployment" succeeds:
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    env = marathon_config["env"]
    env["SDK_UNINSTALL"] = "w00t"
    sdk_marathon.update_app(marathon_config)
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
    sdk_tasks.check_running(config.SERVICE_NAME, 0, allow_more=False)
Example #52
0
def test_endpoints():
    # check that we can reach the scheduler via admin router, and that returned endpoints are sanitized:
    for endpoint in config.ENDPOINT_TYPES:
        endpoints = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'endpoints {}'.format(endpoint), json=True)
        host = endpoint.split('-')[0] # 'coordinator-http' => 'coordinator'
        assert endpoints['dns'][0].startswith(sdk_hosts.autoip_host(foldered_name, host + '-0-node'))
        assert endpoints['vip'].startswith(sdk_hosts.vip_host(foldered_name, host))

    sdk_plan.wait_for_completed_deployment(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)
Example #53
0
def test_uninstall():
    config.check_running()

    # add the needed envvar in marathon and confirm that the uninstall "deployment" succeeds:
    marathon_config = sdk_marathon.get_config(config.SERVICE_NAME)
    env = marathon_config['env']
    env['SDK_UNINSTALL'] = 'w00t'
    sdk_marathon.update_app(config.SERVICE_NAME, marathon_config)
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
    sdk_tasks.check_running(config.SERVICE_NAME, 0)
Example #54
0
def test_master_reelection():
    initial_master = config.get_elasticsearch_master(service_name=foldered_name)
    shakedown.kill_process_on_host(sdk_hosts.system_host(foldered_name, initial_master), "master__.*Elasticsearch")
    sdk_plan.wait_for_in_progress_recovery(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)
    config.wait_for_expected_nodes_to_exist(service_name=foldered_name)
    new_master = config.get_elasticsearch_master(service_name=foldered_name)
    assert new_master.startswith("master") and new_master != initial_master

    sdk_plan.wait_for_completed_deployment(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)
Example #55
0
def check_task_not_relaunched(service_name, task_name, old_task_id, timeout_seconds=DEFAULT_TIMEOUT_SECONDS):
    sdk_plan.wait_for_completed_deployment(service_name)
    sdk_plan.wait_for_completed_recovery(service_name)

    try:
        task_ids = set([t['id'] for t in shakedown.get_tasks() if t['name'] == task_name])
    except dcos.errors.DCOSHTTPException:
        log.info('Failed to get task ids for service {}'.format(service_name))
        task_ids = set([])

    assert len(task_ids) == 1 and old_task_id in task_ids
Example #56
0
def test_endpoints() -> None:
    # Check that we can reach the scheduler via admin router, and that returned endpoints are
    # sanitized.
    for endpoint in config.ENDPOINT_TYPES:
        endpoints = sdk_networks.get_endpoint(package_name, service_name, endpoint)
        host = endpoint.split("-")[0]  # 'coordinator-http' => 'coordinator'
        assert endpoints["dns"][0].startswith(sdk_hosts.autoip_host(service_name, host + "-0-node"))
        assert endpoints["vip"].startswith(sdk_hosts.vip_host(service_name, host))

    sdk_plan.wait_for_completed_deployment(service_name)
    sdk_plan.wait_for_completed_recovery(service_name)