Example #1
0
def test_config_update_then_scheduler_died():
    world_ids = tasks.get_task_ids(PACKAGE_NAME, 'world')
    host = marathon.get_scheduler_host(PACKAGE_NAME)
    bump_world_cpus()
    tasks.kill_task_with_pattern('helloworld.scheduler.Main', host)
    tasks.check_tasks_updated(PACKAGE_NAME, 'world', world_ids)
    check_running()
Example #2
0
def test_config_update_then_scheduler_died():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    host = sdk_marathon.get_scheduler_host(config.SERVICE_NAME)
    config.bump_world_cpus()
    sdk_cmd.kill_task_with_pattern('helloworld.scheduler.Main', host)
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()
Example #3
0
def test_unchanged_scheduler_restarts_without_restarting_tasks():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    initial_task_ids = sdk_tasks.get_task_ids(foldered_name, '')
    shakedown.kill_process_on_host(
        sdk_marathon.get_scheduler_host(foldered_name),
        "elastic.scheduler.Main")
    sdk_tasks.check_tasks_not_updated(foldered_name, '', initial_task_ids)
def test_config_update_then_scheduler_died():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    host = sdk_marathon.get_scheduler_host(config.SERVICE_NAME)
    config.bump_world_cpus()
    sdk_cmd.kill_task_with_pattern('helloworld.scheduler.Main', host)
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()
Example #5
0
def test_unchanged_scheduler_restarts_without_restarting_tasks():
    initial_task_ids = sdk_tasks.get_task_ids(FOLDERED_SERVICE_NAME, "master")
    shakedown.kill_process_on_host(
        sdk_marathon.get_scheduler_host(FOLDERED_SERVICE_NAME),
        "elastic.scheduler.Main")
    sdk_tasks.check_tasks_not_updated(FOLDERED_SERVICE_NAME, "master",
                                      initial_task_ids)
Example #6
0
def test_kill_scheduler():
    scheduler_task_prefix = sdk_marathon.get_scheduler_task_prefix(
        config.SERVICE_NAME)
    scheduler_ids = sdk_tasks.get_task_ids("marathon", scheduler_task_prefix)
    assert len(scheduler_ids) == 1, "Expected to find one scheduler task"

    sdk_cmd.kill_task_with_pattern(
        "./hello-world-scheduler/bin/helloworld",
        "nobody",
        agent_host=sdk_marathon.get_scheduler_host(config.SERVICE_NAME),
    )

    sdk_tasks.check_tasks_updated("marathon", scheduler_task_prefix,
                                  scheduler_ids)
    check_healthy()
Example #7
0
def test_kill_scheduler():
    task_ids = sdk_tasks.get_task_ids(foldered_name, "")
    scheduler_task_prefix = sdk_marathon.get_scheduler_task_prefix(foldered_name)
    scheduler_ids = sdk_tasks.get_task_ids("marathon", scheduler_task_prefix)
    assert len(scheduler_ids) == 1, "Expected to find one scheduler task"

    sdk_cmd.kill_task_with_pattern(
        "./hdfs-scheduler/bin/hdfs",
        "nobody",
        agent_host=sdk_marathon.get_scheduler_host(foldered_name),
    )

    # scheduler should be restarted, but service tasks should be left as-is:
    sdk_tasks.check_tasks_updated("marathon", scheduler_task_prefix, scheduler_ids)
    sdk_tasks.check_tasks_not_updated(foldered_name, "", task_ids)
    config.check_healthy(service_name=foldered_name)
def test_kill_scheduler():
    task_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, "")
    scheduler_task_prefix = sdk_marathon.get_scheduler_task_prefix(config.SERVICE_NAME)
    scheduler_ids = sdk_tasks.get_task_ids("marathon", scheduler_task_prefix)
    assert len(scheduler_ids) == 1, "Expected to find ONLY one scheduler task but found {}".format(scheduler_ids)

    sdk_cmd.kill_task_with_pattern(
        "./hello-world-scheduler/bin/helloworld",
        "nobody",
        agent_host=sdk_marathon.get_scheduler_host(config.SERVICE_NAME),
    )

    sdk_tasks.check_tasks_updated("marathon", scheduler_task_prefix, scheduler_ids)
    sdk_tasks.wait_for_active_framework(config.SERVICE_NAME)
    config.check_running()
    sdk_tasks.check_tasks_not_updated(config.SERVICE_NAME, "", task_ids)
Example #9
0
def test_kill_scheduler():
    task_ids = sdk_tasks.get_task_ids(foldered_name, "")
    scheduler_task_prefix = sdk_marathon.get_scheduler_task_prefix(foldered_name)
    scheduler_ids = sdk_tasks.get_task_ids("marathon", scheduler_task_prefix)
    assert len(scheduler_ids) == 1, "Expected to find one scheduler task"

    sdk_cmd.kill_task_with_pattern(
        "./hdfs-scheduler/bin/hdfs",
        "nobody",
        agent_host=sdk_marathon.get_scheduler_host(foldered_name),
    )

    # scheduler should be restarted, but service tasks should be left as-is:
    sdk_tasks.check_tasks_updated("marathon", scheduler_task_prefix, scheduler_ids)
    sdk_tasks.wait_for_active_framework(foldered_name)
    sdk_tasks.check_tasks_not_updated(foldered_name, "", task_ids)
    config.check_healthy(service_name=foldered_name)
Example #10
0
def test_dispatcher_placement(configure_universe):
    constraint = [
        "hostname", "CLUSTER",
        sdk_agents.get_private_agents().pop()["hostname"]
    ]
    service_name = "spark"
    log.info("Running test: service_name='{}', constraints=[[{}]]".format(
        service_name, ','.join(constraint)))

    options = {"service": {"name": service_name, "constraints": [constraint]}}
    try:
        utils.require_spark(service_name=service_name,
                            additional_options=options)

        dispatcher_host = sdk_marathon.get_scheduler_host(service_name)
        log.info("Dispatcher Host: {}".format(dispatcher_host))
        assert constraint[2] == dispatcher_host
    finally:
        utils.teardown_spark(service_name=service_name)
Example #11
0
def test_kill_scheduler():
    task_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, "")
    scheduler_task_prefix = sdk_marathon.get_scheduler_task_prefix(
        config.SERVICE_NAME)
    scheduler_ids = sdk_tasks.get_task_ids("marathon", scheduler_task_prefix)
    assert len(
        scheduler_ids
    ) == 1, "Expected to find ONLY one scheduler task but found {}".format(
        scheduler_ids)

    sdk_cmd.kill_task_with_pattern(
        "./hello-world-scheduler/bin/helloworld",
        "nobody",
        agent_host=sdk_marathon.get_scheduler_host(config.SERVICE_NAME),
    )

    sdk_tasks.check_tasks_updated("marathon", scheduler_task_prefix,
                                  scheduler_ids)
    sdk_tasks.wait_for_active_framework(config.SERVICE_NAME)
    config.check_running()
    sdk_tasks.check_tasks_not_updated(config.SERVICE_NAME, "", task_ids)
Example #12
0
def test_scheduler_died():
    tasks.kill_task_with_pattern('helloworld.scheduler.Main',
                                 marathon.get_scheduler_host(PACKAGE_NAME))
    check_running()
Example #13
0
def test_scheduler_died():
    sdk_cmd.kill_task_with_pattern(
        'helloworld.scheduler.Main',
        sdk_marathon.get_scheduler_host(config.SERVICE_NAME))
    config.check_running()
Example #14
0
def test_unchanged_scheduler_restarts_without_restarting_tasks():
    initial_task_ids = sdk_tasks.get_task_ids(FOLDERED_SERVICE_NAME, "master")
    shakedown.kill_process_on_host(sdk_marathon.get_scheduler_host(
        FOLDERED_SERVICE_NAME), "elastic.scheduler.Main")
    sdk_tasks.check_tasks_not_updated(
        FOLDERED_SERVICE_NAME, "master", initial_task_ids)
def test_scheduler_died():
    sdk_cmd.kill_task_with_pattern('helloworld.scheduler.Main', sdk_marathon.get_scheduler_host(config.SERVICE_NAME))
    config.check_running()
def test_marathon_volume_collission():
    # This test validates that a service registered in a sub-role of
    # slave_public will _not_ unreserve Marathon volumes RESERVED
    # in the `slave_public` role.

    # Uninstall HW first
    sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)

    # Install the marathon app
    marathon_app_name = "persistent-test"
    persistent_app = {
        "id": marathon_app_name,
        "mem": 128,
        "user": "******",
        "cmd": "echo 'this is a test' > persistent-volume/test && sleep 10000",
        "container": {
            "type": "MESOS",
            "volumes": [
                {
                    "persistent": {
                    "type": "root",
                    "size": 500,
                    "constraints": []
                    },
                    "mode": "RW",
                    "containerPath": "persistent-volume"
                }
            ]
        }
    }
    try:
        sdk_marathon.install_app(persistent_app)

        # Get its persistent Volume
        host = sdk_marathon.get_scheduler_host(marathon_app_name)
        ok, pv_name = sdk_cmd.agent_ssh(host, "ls /var/lib/mesos/slave/volumes/roles/slave_public")
        assert ok

        pv_name = pv_name.strip()

        @retrying.retry(wait_fixed=1000, stop_max_delay=60*1000)
        def check_content():
            ok, pv_content = sdk_cmd.agent_ssh(host, "cat /var/lib/mesos/slave/volumes/roles/slave_public/{}/test".format(pv_name))
            assert pv_content.strip() == "this is a test"

        check_content()

        # Scale down the Marathon app
        app_config = sdk_marathon.get_config(marathon_app_name)
        app_config['instances'] = 0
        sdk_marathon.update_app(marathon_app_name, app_config)

        # Install Hello World
        sdk_install.install(config.PACKAGE_NAME,
                            config.SERVICE_NAME,
                            config.DEFAULT_TASK_COUNT,
                            additional_options=pre_reserved_options)

        # Make sure the persistent volume is still there
        check_content()

        # Uninstall Hello World
        sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)

        # Make sure the persistent volume is still there
        check_content()

        # Scale back up the marathon app
        app_config = sdk_marathon.get_config(marathon_app_name)
        app_config['instances'] = 1
        sdk_marathon.update_app(marathon_app_name, app_config)

        # Make sure the persistent volume is still there
        check_content()

    finally:
        # Reinstall hello world
        sdk_install.install(config.PACKAGE_NAME,
                            config.SERVICE_NAME,
                            config.DEFAULT_TASK_COUNT,
                            additional_options=pre_reserved_options)

        sdk_marathon.destroy_app(marathon_app_name)
Example #17
0
def test_unchanged_scheduler_restarts_without_restarting_tasks():
    initial_task_ids = tasks.get_task_ids(PACKAGE_NAME, "master")
    shakedown.kill_process_on_host(marathon.get_scheduler_host(PACKAGE_NAME),
                                   "elastic.scheduler.Main")
    tasks.check_tasks_not_updated(PACKAGE_NAME, "master", initial_task_ids)
Example #18
0
def test_marathon_volume_collision():
    # This test validates that a service registered in a sub-role of
    # slave_public will _not_ unreserve Marathon volumes RESERVED
    # in the `slave_public` role.

    # Uninstall HW first
    sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)

    # Install the marathon app
    marathon_app_name = "persistent-test"
    volume_name = "persistent-volume"
    persistent_app = {
        "id": marathon_app_name,
        "mem": 128,
        "user": "******",
        "cmd":
        "echo 'this is a test' > {}/test && sleep 10000".format(volume_name),
        "container": {
            "type":
            "MESOS",
            "volumes": [{
                "persistent": {
                    "type": "root",
                    "size": 500,
                    "constraints": []
                },
                "mode": "RW",
                "containerPath": volume_name,
            }],
        },
    }
    try:
        sdk_marathon.install_app(persistent_app)

        # Get its persistent Volume
        host = sdk_marathon.get_scheduler_host(marathon_app_name)
        # Should get e.g.: "/var/lib/mesos/slave/volumes/roles/slave_public/persistent-test#persistent-volume#76e7bb6d-64fa-11e8-abc5-8e679b292d5e"
        rc, pv_path, _ = sdk_cmd.agent_ssh(
            host,
            "ls -d /var/lib/mesos/slave/volumes/roles/slave_public/{}#{}#*".
            format(marathon_app_name, volume_name),
        )

        if rc != 0:
            log.error("Could not get slave_public roles. return-code: '%s'\n",
                      rc)
        assert rc == 0

        pv_path = pv_path.strip()

        @retrying.retry(wait_fixed=1000, stop_max_delay=60 * 1000)
        def check_content():
            rc, pv_content, _ = sdk_cmd.agent_ssh(
                host, "cat {}/test".format(pv_path))
            assert rc == 0 and pv_content.strip() == "this is a test"

        check_content()

        # Scale down the Marathon app
        app_config = sdk_marathon.get_config(marathon_app_name)
        app_config["instances"] = 0
        sdk_marathon.update_app(app_config)

        # Install Hello World
        sdk_install.install(
            config.PACKAGE_NAME,
            config.SERVICE_NAME,
            PRERESERVED_TASK_COUNT,
            additional_options=pre_reserved_options,
        )

        # Make sure the persistent volume is still there
        check_content()

        # Uninstall Hello World
        sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)

        # Make sure the persistent volume is still there
        check_content()

        # Scale back up the marathon app
        app_config = sdk_marathon.get_config(marathon_app_name)
        app_config["instances"] = 1
        sdk_marathon.update_app(app_config)

        # Make sure the persistent volume is still there
        check_content()

    finally:
        # Reinstall hello world
        sdk_install.install(
            config.PACKAGE_NAME,
            config.SERVICE_NAME,
            PRERESERVED_TASK_COUNT,
            additional_options=pre_reserved_options,
        )

        sdk_marathon.destroy_app(marathon_app_name)
Example #19
0
def test_backup_and_restore_to_s3_compatible_storage() -> None:
    try:
        sdk_install.install(
            "minio",
            "minio",
            expected_running_tasks=0,
            package_version="0.0.13-RELEASE.2018-10-06T00-15-16Z",
            wait_for_deployment=False,
        )
        temp_key_id = os.getenv("AWS_ACCESS_KEY_ID")

        if not temp_key_id:
            assert (
                False
            ), 'AWS credentials are required for this test. Disable test with e.g. TEST_TYPES="sanity and not aws"'
        temp_secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY")
        options = ""
        if sdk_utils.is_strict_mode():
            sdk_security.create_service_account(
                service_account_name="marathon-lb-sa",
                service_account_secret="marathon-lb/service-account-secret",
            )
            sdk_cmd.run_cli(
                "security org users grant marathon-lb-sa dcos:service:marathon:marathon:services:/ read"
            )
            sdk_cmd.run_cli(
                'security org users grant marathon-lb-sa dcos:service:marathon:marathon:admin:events read --description "Allows access to Marathon events"'
            )
            options = {
                "marathon-lb": {
                    "secret_name": "marathon-lb/service-account-secret",
                    "marathon-uri": "https://marathon.mesos:8443",
                }
            }

        sdk_install.install(
            "marathon-lb",
            "marathon-lb",
            expected_running_tasks=0,
            additional_options=options,
            package_version="1.14.0",
            wait_for_deployment=False,
        )
        host = sdk_marathon.get_scheduler_host("marathon-lb")
        _, public_node_ip, _ = sdk_cmd.agent_ssh(host, "curl -s ifconfig.co")
        minio_endpoint_url = "http://" + public_node_ip + ":9000"
        os.environ["AWS_ACCESS_KEY_ID"] = config.MINIO_AWS_ACCESS_KEY_ID
        os.environ["AWS_SECRET_ACCESS_KEY"] = config.MINIO_AWS_SECRET_ACCESS_KEY
        subprocess.run(
            [
                "aws",
                "s3",
                "mb",
                "s3://" + config.MINIO_BUCKET_NAME,
                "--endpoint",
                minio_endpoint_url,
            ]
        )

        plan_parameters = {
            "AWS_ACCESS_KEY_ID": os.getenv("AWS_ACCESS_KEY_ID"),
            "AWS_SECRET_ACCESS_KEY": os.getenv("AWS_SECRET_ACCESS_KEY"),
            "AWS_REGION": os.getenv("AWS_REGION", "us-west-2"),
            "S3_BUCKET_NAME": config.MINIO_BUCKET_NAME,
            "SNAPSHOT_NAME": str(uuid.uuid1()),
            "CASSANDRA_KEYSPACES": '"testspace1 testspace2"',
            "S3_ENDPOINT_URL": minio_endpoint_url,
        }

        config.run_backup_and_restore(
            config.get_foldered_service_name(),
            "backup-s3",
            "restore-s3",
            plan_parameters,
            config.get_foldered_node_address(),
        )
    finally:
        sdk_install.uninstall("minio", "minio")
        sdk_install.uninstall("marathon-lb", "marathon-lb")
        os.environ["AWS_ACCESS_KEY_ID"] = temp_key_id
        os.environ["AWS_SECRET_ACCESS_KEY"] = temp_secret_access_key
def test_marathon_volume_collision():
    # This test validates that a service registered in a sub-role of
    # slave_public will _not_ unreserve Marathon volumes RESERVED
    # in the `slave_public` role.

    # Uninstall HW first
    sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)

    # Install the marathon app
    marathon_app_name = "persistent-test"
    volume_name = "persistent-volume"
    persistent_app = {
        "id": marathon_app_name,
        "mem": 128,
        "user": "******",
        "cmd": "echo 'this is a test' > {}/test && sleep 10000".format(volume_name),
        "container": {
            "type": "MESOS",
            "volumes": [
                {
                    "persistent": {"type": "root", "size": 500, "constraints": []},
                    "mode": "RW",
                    "containerPath": volume_name,
                }
            ],
        },
    }
    try:
        sdk_marathon.install_app(persistent_app)

        # Get its persistent Volume
        host = sdk_marathon.get_scheduler_host(marathon_app_name)
        # Should get e.g.: "/var/lib/mesos/slave/volumes/roles/slave_public/persistent-test#persistent-volume#76e7bb6d-64fa-11e8-abc5-8e679b292d5e"
        rc, pv_path, _ = sdk_cmd.agent_ssh(
            host,
            "ls -d /var/lib/mesos/slave/volumes/roles/slave_public/{}#{}#*".format(
                marathon_app_name, volume_name
            ),
        )

        if rc != 0:
            log.error(
                "Could not get slave_public roles. return-code: '%s'\n", rc)
        assert rc == 0

        pv_path = pv_path.strip()

        @retrying.retry(wait_fixed=1000, stop_max_delay=60 * 1000)
        def check_content():
            rc, pv_content, _ = sdk_cmd.agent_ssh(host, "cat {}/test".format(pv_path))
            assert rc == 0 and pv_content.strip() == "this is a test"

        check_content()

        # Scale down the Marathon app
        app_config = sdk_marathon.get_config(marathon_app_name)
        app_config["instances"] = 0
        sdk_marathon.update_app(app_config)

        # Install Hello World
        sdk_install.install(
            config.PACKAGE_NAME,
            config.SERVICE_NAME,
            config.DEFAULT_TASK_COUNT,
            additional_options=pre_reserved_options,
        )

        # Make sure the persistent volume is still there
        check_content()

        # Uninstall Hello World
        sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)

        # Make sure the persistent volume is still there
        check_content()

        # Scale back up the marathon app
        app_config = sdk_marathon.get_config(marathon_app_name)
        app_config["instances"] = 1
        sdk_marathon.update_app(app_config)

        # Make sure the persistent volume is still there
        check_content()

    finally:
        # Reinstall hello world
        sdk_install.install(
            config.PACKAGE_NAME,
            config.SERVICE_NAME,
            config.DEFAULT_TASK_COUNT,
            additional_options=pre_reserved_options,
        )

        sdk_marathon.destroy_app(marathon_app_name)
Example #21
0
def test_marathon_volume_collission():
    # This test validates that a service registered in a sub-role of
    # slave_public will _not_ unreserve Marathon volumes RESERVED
    # in the `slave_public` role.

    # Uninstall HW first
    sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)

    # Install the marathon app
    marathon_app_name = "persistent-test"
    persistent_app = {
        "id": marathon_app_name,
        "mem": 128,
        "user": "******",
        "cmd": "echo 'this is a test' > persistent-volume/test && sleep 10000",
        "container": {
            "type":
            "MESOS",
            "volumes": [{
                "persistent": {
                    "type": "root",
                    "size": 500,
                    "constraints": []
                },
                "mode": "RW",
                "containerPath": "persistent-volume"
            }]
        }
    }
    try:
        sdk_marathon.install_app(persistent_app)

        # Get its persistent Volume
        host = sdk_marathon.get_scheduler_host(marathon_app_name)
        ok, pv_name = shakedown.run_command_on_agent(
            host, "ls /var/lib/mesos/slave/volumes/roles/slave_public")
        assert ok

        pv_name = pv_name.strip()

        @retrying.retry(wait_fixed=1000, stop_max_delay=60 * 1000)
        def check_content():
            ok, pv_content = shakedown.run_command_on_agent(
                host,
                "cat /var/lib/mesos/slave/volumes/roles/slave_public/{}/test".
                format(pv_name))
            assert pv_content.strip() == "this is a test"

        check_content()

        # Scale down the Marathon app
        app_config = sdk_marathon.get_config(marathon_app_name)
        app_config['instances'] = 0
        sdk_marathon.update_app(marathon_app_name, app_config)

        # Install Hello World
        sdk_install.install(config.PACKAGE_NAME,
                            config.SERVICE_NAME,
                            config.DEFAULT_TASK_COUNT,
                            additional_options=pre_reserved_options)

        # Make sure the persistent volume is still there
        check_content()

        # Uninstall Hello World
        sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)

        # Make sure the persistent volume is still there
        check_content()

        # Scale back up the marathon app
        app_config = sdk_marathon.get_config(marathon_app_name)
        app_config['instances'] = 1
        sdk_marathon.update_app(marathon_app_name, app_config)

        # Make sure the persistent volume is still there
        check_content()

    finally:
        # Reinstall hello world
        sdk_install.install(config.PACKAGE_NAME,
                            config.SERVICE_NAME,
                            config.DEFAULT_TASK_COUNT,
                            additional_options=pre_reserved_options)

        sdk_marathon.destroy_app(marathon_app_name)