Ejemplo n.º 1
0
def test_event_channel():
    """ Tests the event channel.  The way events are verified is by streaming the events
        to a events.txt file.   The fixture ensures the file is removed before and after the test.
        events checked are connecting, deploying a good task and killing a task.
    """
    app_def = apps.mesos_app()
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    master_ip = shakedown.master_ip()

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_deployment_message():
        status, stdout = shakedown.run_command(master_ip, 'cat events.txt')
        assert 'event_stream_attached' in stdout, "event_stream_attached event has not been found"
        assert 'deployment_info' in stdout, "deployment_info event has not been found"
        assert 'deployment_step_success' in stdout, "deployment_step_success has not been found"

    check_deployment_message()
    client.remove_app(app_id, True)
    shakedown.deployment_wait()

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_kill_message():
        status, stdout = shakedown.run_command(master_ip, 'cat events.txt')
        assert 'KILLED' in stdout, "KILLED event has not been found"

    check_kill_message()
Ejemplo n.º 2
0
def test_incremental_groups_scale():
    """
    Scale number of groups.
    """

    client = marathon.create_client()

    batch_size_for = exponential_decay(start=40, decay=0.01)
    total = 0
    for step in itertools.count(start=0):
        batch_size = batch_size_for(step)
        total += batch_size
        shakedown.echo("Add {} groups totaling {}".format(batch_size, total))

        group_ids = ("/group-{0:0>4}".format(step * batch_size + i)
                     for i in range(batch_size))
        app_ids = ("{}/app-1".format(g) for g in group_ids)
        app_definitions = [app_def(app_id) for app_id in app_ids]

        # There is no app id. We simply PUT /v2/apps to create groups in
        # batches.
        client.update_app('', app_definitions)
        shakedown.deployment_wait(
                timeout=timedelta(minutes=15).total_seconds())

        shakedown.echo("done.")
Ejemplo n.º 3
0
def test_incremental_group_nesting():
    """
    Scale depth of nested groups. Again we grow fast at the beginning and then
    slow the growth.
    """

    client = marathon.create_client()

    batch_size_for = exponential_decay(start=5, decay=0.1)
    depth = 0
    for step in itertools.count(start=0):
        batch_size = batch_size_for(step)
        depth += batch_size
        shakedown.echo("Create a group with a nesting of {}".format(depth))

        group_ids = ("group-{0:0>3}".format(g) for g in range(depth))
        nested_groups = '/'.join(group_ids)

        # Note: We always deploy into the same nested groups.
        app_id = '/{0}/app-1'.format(nested_groups)

        client.add_app(app_def(app_id))
        shakedown.deployment_wait(
                timeout=timedelta(minutes=15).total_seconds())

        shakedown.echo("done.")
Ejemplo n.º 4
0
def test_incremental_apps_per_group_scale():
    """
    Try to reach the maximum number of apps. We start with batches of apps in a
    group and decay the batch size.
    """

    client = marathon.create_client()

    batch_size_for = exponential_decay(start=500, decay=0.3)
    for step in itertools.count(start=0):
        batch_size = batch_size_for(step)
        shakedown.echo("Add {} apps".format(batch_size))

        group_id = "/batch-{0:0>3}".format(step)
        app_ids = ("app-{0:0>4}".format(i) for i in range(batch_size))
        app_definitions = [app_def(app_id) for app_id in app_ids]
        next_batch = {
            "apps": app_definitions,
            "dependencies": [],
            "id": group_id
        }

        client.create_group(next_batch)
        shakedown.deployment_wait(
                timeout=timedelta(minutes=15).total_seconds())

        shakedown.echo("done.")
Ejemplo n.º 5
0
def test_lock():
    '''This test verifies that a second scheduler fails to startup when
    an existing scheduler is running.  Without locking, the scheduler
    would fail during registration, but after writing its config to ZK.
    So in order to verify that the scheduler fails immediately, we ensure
    that the ZK config state is unmodified.'''

    marathon_client = dcos.marathon.create_client()

    # Get ZK state from running framework
    zk_path = "dcos-service-{}/ConfigTarget".format(PACKAGE_NAME)
    zk_config_old = shakedown.get_zk_node_data(zk_path)

    # Get marathon app
    app_id = "/{}".format(PACKAGE_NAME)
    app = marathon_client.get_app(app_id)
    old_timestamp = app.get("lastTaskFailure", {}).get("timestamp", None)

    # Scale to 2 instances
    labels = app["labels"]
    labels.pop("MARATHON_SINGLE_INSTANCE_APP")
    marathon_client.update_app(app_id, {"labels": labels})
    shakedown.deployment_wait()
    marathon_client.update_app(app_id, {"instances": 2})

    # Wait for second scheduler to fail
    def fn():
        timestamp = marathon_client.get_app(app_id).get("lastTaskFailure", {}).get("timestamp", None)
        return timestamp != old_timestamp
    spin.time_wait_noisy(lambda: fn())

    # Verify ZK is unchanged
    zk_config_new = shakedown.get_zk_node_data(zk_path)
    assert zk_config_old == zk_config_new
Ejemplo n.º 6
0
def test_mom_when_mom_process_killed():
    """Launched a task from MoM then killed MoM."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.deployment_wait()
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]['id']

        shakedown.kill_process_on_host(common.ip_of_mom(), 'marathon-assembly')
        shakedown.wait_for_task('marathon', 'marathon-user', 300)
        shakedown.wait_for_service_endpoint('marathon-user')

        @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0]['id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
Ejemplo n.º 7
0
def test_pod_file_based_secret(secret_fixture):
    secret_name, secret_value = secret_fixture
    secret_normalized_name = secret_name.replace('/', '')

    pod_id = '/{}'.format(uuid.uuid4().hex)

    pod_def = {
        "id": pod_id,
        "containers": [{
            "name": "container-1",
            "resources": {
                "cpus": 0.1,
                "mem": 64
            },
            "endpoints": [{
                "name": "http",
                "hostPort": 0,
                "protocol": [
                    "tcp"
                ]}
            ],
            "exec": {
                "command": {
                    "shell": "cat {} >> {}_file && /opt/mesosphere/bin/python -m http.server $ENDPOINT_HTTP".format(
                        secret_normalized_name, secret_normalized_name),
                }
            },
            "volumeMounts": [{
                "name": "vol",
                "mountPath": secret_name
            }],
        }],
        "networks": [{
            "mode": "host"
        }],
        "volumes": [{
            "name": "vol",
            "secret": "secret1"
        }],
        "secrets": {
            "secret1": {
                "source": secret_name
            }
        }
    }

    client = marathon.create_client()
    client.add_pod(pod_def)
    shakedown.deployment_wait()

    instances = client.show_pod(pod_id)['instances']
    assert len(instances) == 1, 'Failed to start the file based secret pod'

    port = instances[0]['containers'][0]['endpoints'][0]['allocatedHostPort']
    host = instances[0]['networks'][0]['addresses'][0]
    cmd = "curl {}:{}/{}_file".format(host, port, secret_normalized_name)
    status, data = shakedown.run_command_on_master(cmd)

    assert status, "{} did not succeed".format(cmd)
    assert data.rstrip() == secret_value, "Got an unexpected secret data"
Ejemplo n.º 8
0
def test_marathon_when_disconnected_from_zk():
    """ Launch an app from Marathon.  Then knock out access to zk from the MoM.
        Verify the task is still good.
    """
    app_def = app('zk-failure')
    host = ip_other_than_mom()
    pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()
    tasks = client.get_tasks('/zk-failure')
    original_task_id = tasks[0]['id']

    with shakedown.iptable_rules(host):
        block_port(host, 2181)
        #  time of the zk block
        time.sleep(10)

    # after access to zk is restored.
    @retrying.retry(wait_fixed=1000, stop_max_delay=3000)
    def check_task_is_back():
        tasks = client.get_tasks('/zk-failure')
        tasks[0]['id'] == original_task_id

    check_task_is_back()
Ejemplo n.º 9
0
def test_health_failed_check():
    """ Deploys a pod with good health checks, then partitions the network and verifies
        the tasks return with new task ids.
    """
    client = marathon.create_client()

    pod_id = "/pod-ken".format(uuid.uuid4().hex)

    pod_json = _pods_json('pod-ports.json')
    pod_json["id"] = pod_id
    host = ip_other_than_mom()
    pin_pod_to_host(pod_json, host)
    client.add_pod(pod_json)
    shakedown.deployment_wait()

    tasks = get_pod_tasks(pod_id)
    initial_id1 = tasks[0]['id']
    initial_id2 = tasks[1]['id']

    pod = client.list_pod()[0]
    container1 = pod['instances'][0]['containers'][0]
    port = container1['endpoints'][0]['allocatedHostPort']

    save_iptables(host)
    block_port(host, port)
    time.sleep(7)
    restore_iptables(host)
    shakedown.deployment_wait()

    tasks = get_pod_tasks(pod_id)
    for task in tasks:
        assert task['id'] != initial_id1
        assert task['id'] != initial_id2
Ejemplo n.º 10
0
def test_docker_dns_mapping(marathon_service_name):
    """ Tests that a running docker task is accessible from DNS.
    """

    app_id = uuid.uuid4().hex
    client = marathon.create_client()
    app_json = app_docker(app_id)
    client.add_app(app_json)
    shakedown.deployment_wait()

    tasks = client.get_tasks(app_id)
    host = tasks[0]['host']

    bad_cmd = 'ping -c 1 docker-test.marathon-user.mesos-bad'
    status, output = shakedown.run_command_on_master(bad_cmd)
    assert not status

    @retrying.retry(stop_max_attempt_number=30)
    def check_dns():
        cmd = 'ping -c 1 {}.{}.mesos'.format(app_id, marathon_service_name)
        shakedown.wait_for_dns('{}.{}.mesos'.format(app_id, marathon_service_name))
        status, output = shakedown.run_command_on_master(cmd)
        assert status

    check_dns()
Ejemplo n.º 11
0
def test_launch_docker_graceperiod(marathon_service_name):
    """ Test the 'taskKillGracePeriodSeconds' in a Marathon environment.
        This is the same test as above however tests against docker.
    """

    app_id = uuid.uuid4().hex
    app_def = app_docker(app_id)
    app_def['container']['docker']['image'] = 'kensipe/python-test'
    default_graceperiod = 3
    graceperiod = 20
    app_def['taskKillGracePeriodSeconds'] = graceperiod
    app_def['cmd'] = 'python test.py'

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    tasks = shakedown.get_service_task(marathon_service_name, app_id)
    assert tasks is not None

    client.scale_app(app_id, 0)
    tasks = shakedown.get_service_task(marathon_service_name, app_id)
    assert tasks is not None

    # task should still be here after the default_graceperiod
    time.sleep(default_graceperiod + 1)
    tasks = shakedown.get_service_task(marathon_service_name, app_id)
    assert tasks is not None

    # but not after the set graceperiod
    time.sleep(graceperiod)
    tasks = shakedown.get_service_task(marathon_service_name, app_id)
    assert tasks is None
Ejemplo n.º 12
0
def test_marathon_when_disconnected_from_zk():
    """Launches an app from Marathon, then knocks out access to ZK from Marathon.
       Verifies the task is preserved.
    """

    app_def = apps.sleep_app()
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    shakedown.deployment_wait()
    tasks = client.get_tasks(app_def["id"])
    original_task_id = tasks[0]['id']

    common.block_iptable_rules_for_seconds(host, 2181, sleep_seconds=10, block_input=True, block_output=False)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_task_is_back():
        tasks = client.get_tasks(app_def["id"])
        assert tasks[0]['id'] == original_task_id, \
            "The task {} got replaced with {}".format(original_task_id, tasks[0]['id'])

    check_task_is_back()
Ejemplo n.º 13
0
def test_install_marathon():
    """Install the Marathon package for DC/OS.
    """

    # Install
    shakedown.install_package_and_wait(PACKAGE_NAME)
    assert shakedown.package_installed(PACKAGE_NAME), 'Package failed to install'

    end_time = time.time() + WAIT_TIME_IN_SECS
    found = False
    while time.time() < end_time:
        found = shakedown.get_service(PACKAGE_NAME) is not None
        if found and shakedown.service_healthy(SERVICE_NAME):
            break
        time.sleep(1)

    assert found, 'Service did not register with DCOS'
    shakedown.deployment_wait()

    # Uninstall
    uninstall('marathon-user')
    shakedown.deployment_wait()

    # Reinstall
    shakedown.install_package_and_wait(PACKAGE_NAME)
    assert shakedown.package_installed(PACKAGE_NAME), 'Package failed to reinstall'
    #
    try:
        shakedown.install_package(PACKAGE_NAME)
    except Exception as e:
        pass
    else:
        # Exception is not raised -> exit code was 0
        assert False, "Error: CLI returns 0 when asked to install Marathon"
Ejemplo n.º 14
0
def test_event_channel():
    """ Tests the Marathon event channnel specific to pod events.
    """
    client = marathon.create_client()
    pod_id = "/pod-create"

    pod_json = _pods_json()
    pod_json["id"] = pod_id
    client.add_pod(pod_json)
    shakedown.deployment_wait()

    # look for created
    @retrying.retry(stop_max_delay=10000)
    def check_deployment_message():
        status, stdout = shakedown.run_command_on_master('cat test.txt')
        assert 'event_stream_attached' in stdout
        assert 'pod_created_event' in stdout
        assert 'deployment_step_success' in stdout

    pod_json["scaling"]["instances"] = 3
    client.update_pod(pod_id, pod_json)
    shakedown.deployment_wait()

    # look for updated
    @retrying.retry(stop_max_delay=10000)
    def check_update_message():
        status, stdout = shakedown.run_command_on_master('cat test.txt')
        assert 'pod_updated_event' in stdout
Ejemplo n.º 15
0
def test_launch_container_with_persistent_volume():
    """ Tests launching a task with PV.  It will write to a file in the PV.
        The app is killed and restarted and we can still read from the PV.
    """
    app_def = persistent_volume_app()
    app_id = app_def['id']
    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    tasks = client.get_tasks(app_id)
    assert len(tasks) == 1

    port = tasks[0]['ports'][0]
    host = tasks[0]['host']
    cmd = "curl {}:{}/data/foo".format(host, port)
    run, data = shakedown.run_command_on_master(cmd)

    assert run, "{} did not succeed".format(cmd)
    assert data == 'hello\n', "'{}' was not equal to hello\\n".format(data)

    client.restart_app(app_id)
    shakedown.deployment_wait()

    tasks = client.get_tasks(app_id)
    assert len(tasks) == 1

    port = tasks[0]['ports'][0]
    host = tasks[0]['host']
    cmd = "curl {}:{}/data/foo".format(host, port)
    run, data = shakedown.run_command_on_master(cmd)

    assert run, "{} did not succeed".format(cmd)
    assert data == 'hello\nhello\n', "'{}' was not equal to hello\\nhello\\n".format(data)
Ejemplo n.º 16
0
def test_vip_mesos_cmd(marathon_service_name):
    """ Tests the creation of a VIP from a python command NOT in a docker.  the
        test validates the creation of an app with the VIP label and the accessability
        of the service via the VIP.
    """
    vip_name = 'vip-service'
    fqn = '{}.{}.l4lb.thisdcos.directory'.format(vip_name, marathon_service_name)
    app_def = python_http_app()
    app_def['portDefinitions'] = [
        {
          "port": 0,
          "protocol": "tcp",
          "name": "{}".format(vip_name),
          "labels": {
            "VIP_0": "/{}:10000".format(vip_name)
          }
        }
        ]
    app_def['id'] = vip_name
    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    @retrying.retry
    def http_output_check(stop_max_attempt_number=30):
        common.assert_http_code('{}:{}'.format(fqn, 10000))

    http_output_check()
Ejemplo n.º 17
0
def test_launch_mesos_grace_period(marathon_service_name):
    """Tests 'taskKillGracePeriodSeconds' option using a Mesos container in a Marathon environment.
       Read more details about this test in `test_root_marathon.py::test_launch_mesos_root_marathon_grace_period`
    """

    app_def = apps.mesos_app()

    default_grace_period = 3
    grace_period = 20

    app_def['fetch'] = [{"uri": "https://downloads.mesosphere.com/testing/test.py"}]
    app_def['cmd'] = '/opt/mesosphere/bin/python test.py'
    app_def['taskKillGracePeriodSeconds'] = grace_period
    app_id = app_def['id'].lstrip('/')

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait(app_id=app_id)

    tasks = shakedown.get_service_task(marathon_service_name, app_id)
    assert tasks is not None

    client.scale_app(app_id, 0)
    tasks = shakedown.get_service_task(marathon_service_name, app_id)
    assert tasks is not None

    # tasks should still be here after the default_grace_period
    time.sleep(default_grace_period + 1)
    tasks = shakedown.get_service_task(marathon_service_name, app_id)
    assert tasks is not None

    # but not after the set grace_period
    time.sleep(grace_period)
    tasks = shakedown.get_service_task(marathon_service_name, app_id)
    assert tasks is None
Ejemplo n.º 18
0
def test_scale_app_in_group():
    """Scales an individual app in a group."""

    group_def = groups.sleep_group()
    groups_id = group_def["groups"][0]["id"]

    client = marathon.create_client()
    client.create_group(group_def)

    shakedown.deployment_wait()

    group_apps = client.get_group(groups_id)
    apps = group_apps['apps']
    assert len(apps) == 2, "The number of apps is {}, but 2 was expected".format(len(apps))

    app1_id = group_def["groups"][0]["apps"][0]["id"]
    app2_id = group_def["groups"][0]["apps"][1]["id"]
    tasks1 = client.get_tasks(app1_id)
    tasks2 = client.get_tasks(app2_id)
    assert len(tasks1) == 1, "The number of tasks #1 is {} after deployment, but 1 was expected".format(len(tasks1))
    assert len(tasks2) == 1, "The number of tasks #2 is {} after deployment, but 1 was expected".format(len(tasks2))

    # scaling just one app in the group
    client.scale_app(app1_id, 2)
    shakedown.deployment_wait()

    tasks1 = client.get_tasks(app1_id)
    tasks2 = client.get_tasks(app2_id)
    assert len(tasks1) == 2, "The number of tasks #1 is {} after scale, but 2 was expected".format(len(tasks1))
    assert len(tasks2) == 1, "The number of tasks #2 is {} after scale, but 1 was expected".format(len(tasks2))
Ejemplo n.º 19
0
def test_pinned_task_does_not_scale_to_unpinned_host():
    """Tests when a task lands on a pinned node (and barely fits) and it is asked to scale past
       the resources of that node, no tasks will be launched on any other node.
    """

    app_def = apps.sleep_app()
    app_id = app_def['id']

    host = common.ip_other_than_mom()
    print('Constraint set to host: {}'.format(host))
    # the size of cpus is designed to be greater than 1/2 of a node
    # such that only 1 task can land on the node.
    cores = common.cpus_on_agent(host)
    app_def['cpus'] = max(0.6, cores - 0.5)
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    shakedown.deployment_wait(app_id=app_id)
    client.scale_app(app_id, 2)

    time.sleep(5)
    deployments = client.get_deployments(app_id=app_id)
    tasks = client.get_tasks(app_id)

    # still deploying
    assert len(deployments) == 1, "The number of deployments is {}, but 1 was expected".format(len(deployments))
    assert len(tasks) == 1, "The number of tasks is {}, but 1 was expected".format(len(tasks))
Ejemplo n.º 20
0
def test_vip_docker_bridge_mode(marathon_service_name):
    """Tests the creation of a VIP from a python command in a docker image using bridge mode.
       the test validates the creation of an app with the VIP label and the accessability
       of the service via the VIP.
    """

    app_def = apps.docker_http_server()

    vip_name = app_def["id"].lstrip("/")
    fqn = '{}.{}.l4lb.thisdcos.directory'.format(vip_name, marathon_service_name)

    app_def['id'] = vip_name
    app_def['container']['docker']['portMappings'] = [{
        "containerPort": 8080,
        "hostPort": 0,
        "labels": {
            "VIP_0": "/{}:10000".format(vip_name)
        },
        "protocol": "tcp",
        "name": "{}".format(vip_name)
    }]

    client = marathon.create_client()
    client.add_app(app_def)

    shakedown.deployment_wait()

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def http_output_check():
        time.sleep(1)
        common.assert_http_code('{}:{}'.format(fqn, 10000))

    http_output_check()
Ejemplo n.º 21
0
def test_launch_and_scale_group():
    """Launches and scales a group."""

    group_def = groups.sleep_group()
    groups_id = group_def["groups"][0]["id"]

    client = marathon.create_client()
    client.create_group(group_def)

    shakedown.deployment_wait()

    group_apps = client.get_group(groups_id)
    apps = group_apps['apps']
    assert len(apps) == 2, "The number of apps is {}, but 2 was expected".format(len(apps))

    app1_id = group_def["groups"][0]["apps"][0]["id"]
    app2_id = group_def["groups"][0]["apps"][1]["id"]
    tasks1 = client.get_tasks(app1_id)
    tasks2 = client.get_tasks(app2_id)
    assert len(tasks1) == 1, "The number of tasks #1 is {} after deployment, but 1 was expected".format(len(tasks1))
    assert len(tasks2) == 1, "The number of tasks #2 is {} after deployment, but 1 was expected".format(len(tasks2))

    # scale by 2 for the entire group
    client.scale_group(groups_id, 2)
    shakedown.deployment_wait()

    tasks1 = client.get_tasks(app1_id)
    tasks2 = client.get_tasks(app2_id)
    assert len(tasks1) == 2, "The number of tasks #1 is {} after scale, but 2 was expected".format(len(tasks1))
    assert len(tasks2) == 2, "The number of tasks #2 is {} after scale, but 2 was expected".format(len(tasks2))
Ejemplo n.º 22
0
def test_vip_mesos_cmd(marathon_service_name):
    """Validates the creation of an app with a VIP label and the accessibility of the service via the VIP."""

    app_def = apps.http_server()

    vip_name = app_def["id"].lstrip("/")
    fqn = '{}.{}.l4lb.thisdcos.directory'.format(vip_name, marathon_service_name)

    app_def['portDefinitions'] = [{
        "port": 0,
        "protocol": "tcp",
        "name": "{}".format(vip_name),
        "labels": {
            "VIP_0": "/{}:10000".format(vip_name)
        }
    }]

    client = marathon.create_client()
    client.add_app(app_def)

    shakedown.deployment_wait()

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def http_output_check():
        time.sleep(1)
        common.assert_http_code('{}:{}'.format(fqn, 10000))

    http_output_check()
Ejemplo n.º 23
0
def test_launch_docker_grace_period(marathon_service_name):
    """Tests 'taskKillGracePeriodSeconds' option using a Docker container in a Marathon environment.
       Read more details about this test in `test_root_marathon.py::test_launch_mesos_root_marathon_grace_period`
    """

    app_def = apps.docker_http_server()
    app_def['container']['docker']['image'] = 'kensipe/python-test'

    default_grace_period = 3
    grace_period = 20
    app_def['taskKillGracePeriodSeconds'] = grace_period
    app_def['cmd'] = 'python test.py'
    app_id = app_def['id'].lstrip('/')

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait(app_id=app_id)

    tasks = shakedown.get_service_task(marathon_service_name, app_id)
    assert tasks is not None

    client.scale_app(app_id, 0)
    tasks = shakedown.get_service_task(marathon_service_name, app_id)
    assert tasks is not None

    # tasks should still be here after the default_graceperiod
    time.sleep(default_grace_period + 1)
    tasks = shakedown.get_service_task(marathon_service_name, app_id)
    assert tasks is not None

    # but not after the set grace_period
    time.sleep(grace_period)
    tasks = shakedown.get_service_task(marathon_service_name, app_id)
    assert tasks is None
Ejemplo n.º 24
0
def test_event_channel():
    """ Tests the event channel.  The way events are verified is by streaming the events
        to a test.txt file.   The fixture ensures the file is removed before and after the test.
        events checked are connecting, deploying a good task and killing a task.
    """
    app_def = common.app_mesos()
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    @retrying.retry(wait_fixed=1000, stop_max_delay=10000)
    def check_deployment_message():
        status, stdout = shakedown.run_command_on_master('cat test.txt')
        assert 'event_stream_attached' in stdout
        assert 'deployment_info' in stdout
        assert 'deployment_step_success' in stdout

    client.remove_app(app_id, True)
    shakedown.deployment_wait()

    @retrying.retry(wait_fixed=1000, stop_max_delay=10000)
    def check_kill_message():
        status, stdout = shakedown.run_command_on_master('cat test.txt')
        assert 'Killed' in stdout
Ejemplo n.º 25
0
def test_vip_docker_bridge_mode(marathon_service_name):
    """ Tests the creation of a VIP from a python command in a docker image using bridge mode.
        the test validates the creation of an app with the VIP label and the accessability
        of the service via the VIP.
    """
    vip_name = 'vip-docker-service'
    fqn = '{}.{}.l4lb.thisdcos.directory'.format(vip_name, marathon_service_name)
    app_def = app_docker()
    app_def['container']['docker']['portMappings'] = [
        {
          "containerPort": 8080,
          "hostPort": 0,
          "labels": {
            "VIP_0": "/{}:10000".format(vip_name)
          },
          "protocol": "tcp",
          "name": "{}".format(vip_name)
        }
      ]
    app_def['id'] = vip_name
    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    @retrying.retry
    def http_output_check(stop_max_attempt_number=30):
        common.assert_http_code('{}:{}'.format(fqn, 10000))

    http_output_check()
Ejemplo n.º 26
0
def test_marathon_with_master_process_failure(marathon_service_name):
    """Launches an app and restarts the master. It is expected that the service endpoint eventually comes back and
       the task ID stays the same.
    """

    app_def = apps.sleep_app()
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    tasks = client.get_tasks(app_def["id"])
    original_task_id = tasks[0]['id']

    common.systemctl_master('restart')
    shakedown.wait_for_service_endpoint(marathon_service_name)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_task_recovery():
        tasks = client.get_tasks(app_def["id"])
        assert len(tasks) == 1, "The number of tasks is {} after master restart, but 1 was expected".format(len(tasks))
        assert tasks[0]['id'] == original_task_id, \
            "Task {} has not recovered, it got replaced with another one: {}".format(original_task_id, tasks[0]['id'])

    check_task_recovery()
Ejemplo n.º 27
0
def test_pod_secret_env_var(secret_fixture):
    # Install enterprise-cli since it's needed to create secrets
    if not common.is_enterprise_cli_package_installed():
        common.install_enterprise_cli_package()

    secret_name, secret_value = secret_fixture

    pod_id = '/{}'.format(uuid.uuid4().hex)
    pod_def = {
        "id": pod_id,
        "containers": [{
            "name": "container-1",
            "resources": {
                "cpus": 0.1,
                "mem": 64
            },
            "endpoints": [{
                "name": "http",
                "hostPort": 0,
                "protocol": [
                    "tcp"
                ]}
            ],
            "exec": {
                "command": {
                    "shell": "echo $SECRET_ENV && echo $SECRET_ENV >> $MESOS_SANDBOX/secret-env && /opt/mesosphere/bin/python -m http.server $ENDPOINT_HTTP"
                }
            }
        }],
        "environment": {
            "SECRET_ENV": {
                "secret": "secret1"
            }
        },
        "networks": [{
            "mode": "host"
        }],
        "secrets": {
            "secret1": {
                "source": secret_name
            }
        }
    }

    client = marathon.create_client()
    client.add_pod(pod_def)
    shakedown.deployment_wait()

    instances = client.show_pod(pod_id)['instances']
    assert len(instances) == 1, 'Failed to start the secret environment variable pod'

    port = instances[0]['containers'][0]['endpoints'][0]['allocatedHostPort']
    host = instances[0]['networks'][0]['addresses'][0]
    cmd = "curl {}:{}/secret-env".format(host, port)
    status, data = shakedown.run_command_on_master(cmd)

    assert status, "{} did not succeed".format(cmd)
    assert data.rstrip() == secret_value
Ejemplo n.º 28
0
def clear_pods():
    try:
        client = marathon.create_client()
        pods = client.list_pod()
        for pod in pods:
            client.remove_pod(pod["id"], True)
        shakedown.deployment_wait()
    except Exception:
        pass
Ejemplo n.º 29
0
def clear_pods():
    # clearing doesn't cause
    try:
        client = marathon.create_client()
        pods = client.list_pod()
        for pod in pods:
            client.remove_pod(pod["id"], True)
        shakedown.deployment_wait()
    except:
        pass
Ejemplo n.º 30
0
def test_install_universe_package(package):
    """ Marathon is responsible for installing packages from the universe.
        This test confirms that several packages are installed into a healty state.
    """

    shakedown.install_package_and_wait(package)
    assert shakedown.package_installed(package), 'Package failed to install'

    shakedown.deployment_wait(timeout=timedelta(minutes=5).total_seconds())
    assert shakedown.service_healthy(package)
Ejemplo n.º 31
0
def test_app_update_rollback():
    """Tests that an updated app can be rolled back to its initial version."""

    app_def = apps.readiness_and_health_app()
    app_id = app_def["id"]

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait(app_id=app_id)

    tasks = client.get_tasks(app_id)
    assert len(tasks) == 1, "The number of tasks is {} after deployment, but 1 was expected".format(len(tasks))

    app_def['instances'] = 2
    client.update_app(app_id, app_def)
    shakedown.deployment_wait(app_id=app_id)

    tasks = client.get_tasks(app_id)
    assert len(tasks) == 2, "The number of tasks is {} after update, but 2 was expected".format(len(tasks))

    # provides a testing delay to rollback in the meantime
    app_def['readinessChecks'][0]['intervalSeconds'] = 30
    app_def['instances'] = 1
    deployment_id = client.update_app(app_id, app_def)
    client.rollback_deployment(deployment_id)
    shakedown.deployment_wait(app_id=app_id)

    # update to 1 instance is rollback to 2
    tasks = client.get_tasks(app_id)
    assert len(tasks) == 2, "The number of tasks is {} after rollback, but 2 was expected".format(len(tasks))
Ejemplo n.º 32
0
def test_scale_app_in_group():
    """ Tests the scaling of an individual app in a group
    """
    with marathon_on_marathon():
        client = marathon.create_client()
        try:
            client.remove_group('/test-group', True)
            shakedown.deployment_wait()
        except Exception as e:
            pass

        client.create_group(group())
        shakedown.deployment_wait()

        group_apps = client.get_group('/test-group/sleep')
        apps = group_apps['apps']
        assert len(apps) == 2
        tasks1 = client.get_tasks('/test-group/sleep/goodnight')
        tasks2 = client.get_tasks('/test-group/sleep/goodnight2')
        assert len(tasks1) == 1
        assert len(tasks2) == 1

        # scaling just an app in the group
        client.scale_app('/test-group/sleep/goodnight', 2)
        shakedown.deployment_wait()
        tasks1 = client.get_tasks('/test-group/sleep/goodnight')
        tasks2 = client.get_tasks('/test-group/sleep/goodnight2')
        assert len(tasks1) == 2
        assert len(tasks2) == 1
Ejemplo n.º 33
0
def test_network_pinger(test_type, get_pinger_app, dns_format, marathon_service_name):
    """ This test runs a pinger app and a relay app. It retrieves the python app from the
    master via the new http service (which will be moving into shakedown). Then a curl call
    to the relay will invoke a call to the 2nd pinger app and return back pong to the relay
    then back to curl.

    It tests that 1 task can network communicate to another task on the given network
    It tests inbound and outbound connectivity

    test_type param is not used.  It is passed so that it is clear which parametrized test
    is running or may be failing.
    """
    client = marathon.create_client()
    pinger_app = get_pinger_app('pinger')
    relay_app = get_pinger_app('relay')
    pinger_dns = dns_format.format('pinger', marathon_service_name)
    relay_dns = dns_format.format('relay', marathon_service_name)

    # test pinger app to master
    shakedown.copy_file_to_master(fixture_dir() + "/pinger.py")

    with shakedown.master_http_service():
        # need to add app with http service in place or it will fail to fetch
        client.add_app(pinger_app)
        client.add_app(relay_app)
        shakedown.deployment_wait()
        shakedown.wait_for_dns(relay_dns)

    relay_url = 'http://{}:7777/relay-ping?url={}:7777'.format(
        relay_dns, pinger_dns
    )

    @retrying.retry
    def http_output_check(stop_max_attempt_number=30):
        status, output = shakedown.run_command_on_master('curl {}'.format(relay_url))
        assert status
        assert 'Pong /pinger' in output
        assert 'Relay from /relay' in output

    http_output_check()
Ejemplo n.º 34
0
def test_event_channel():
    """ Tests the event channel.  The way events are verified is by streaming the events
        to a events.txt file.   The fixture ensures the file is removed before and after the test.
        events checked are connecting, deploying a good task and killing a task.
    """
    app_def = apps.mesos_app()
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait(app_id=app_id)

    leader_ip = shakedown.marathon_leader_ip()

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_deployment_message():
        status, stdout = shakedown.run_command(leader_ip,
                                               'cat events.exitcode')
        assert str(stdout).strip(
        ) == '', "SSE stream disconnected (CURL exit code is {})".format(
            stdout.strip())
        status, stdout = shakedown.run_command(leader_ip, 'cat events.txt')
        assert 'event_stream_attached' in stdout, "event_stream_attached event has not been found"
        assert 'deployment_info' in stdout, "deployment_info event has not been found"
        assert 'deployment_step_success' in stdout, "deployment_step_success has not been found"

    check_deployment_message()
    client.remove_app(app_id, True)
    shakedown.deployment_wait()

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_kill_message():
        status, stdout = shakedown.run_command(leader_ip, 'cat events.txt')
        assert 'KILLED' in stdout, "KILLED event has not been found"

    check_kill_message()
Ejemplo n.º 35
0
def test_update_app_rollback():
    """ Tests updating an app then rolling back the update.
    """
    app_id = uuid.uuid4().hex
    app_def = readiness_and_health_app()
    app_def['id'] = app_id

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    # start with 1
    tasks = client.get_tasks(app_id)
    assert len(tasks) == 1

    app_def['instances'] = 2
    client.update_app(app_id, app_def)
    shakedown.deployment_wait()

    # update works to 2
    tasks = client.get_tasks(app_id)
    assert len(tasks) == 2

    # provides a testing delay to rollback from
    app_def['readinessChecks'][0]['intervalSeconds'] = 30
    app_def['instances'] = 1
    deployment_id = client.update_app(app_id, app_def)
    client.rollback_deployment(deployment_id)

    shakedown.deployment_wait()
    # update to 1 instance is rollback to 2
    tasks = client.get_tasks(app_id)
    assert len(tasks) == 2
def test_vip_docker_bridge_mode(marathon_service_name):
    """Tests the creation of a VIP from a python command in a docker image using bridge mode.
       the test validates the creation of an app with the VIP label and the accessability
       of the service via the VIP.
    """

    app_def = apps.docker_http_server()

    vip_name = app_def["id"].lstrip("/")
    fqn = '{}.{}.l4lb.thisdcos.directory'.format(vip_name,
                                                 marathon_service_name)

    app_def['id'] = vip_name
    app_def['container']['docker']['portMappings'] = [{
        "containerPort":
        8080,
        "hostPort":
        0,
        "labels": {
            "VIP_0": "/{}:10000".format(vip_name)
        },
        "protocol":
        "tcp",
        "name":
        "{}".format(vip_name)
    }]

    client = marathon.create_client()
    client.add_app(app_def)

    shakedown.deployment_wait()

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def http_output_check():
        time.sleep(1)
        common.assert_http_code('{}:{}'.format(fqn, 10000))

    http_output_check()
Ejemplo n.º 37
0
def test_lock():
    '''This test verifies that a second scheduler fails to startup when
    an existing scheduler is running.  Without locking, the scheduler
    would fail during registration, but after writing its config to ZK.
    So in order to verify that the scheduler fails immediately, we ensure
    that the ZK config state is unmodified.'''

    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    marathon_client = dcos.marathon.create_client()

    # Get ZK state from running framework
    zk_path = "dcos-service-{}/ConfigTarget".format(foldered_name)
    zk_config_old = shakedown.get_zk_node_data(zk_path)

    # Get marathon app
    app = marathon_client.get_app(foldered_name)
    old_timestamp = app.get("lastTaskFailure", {}).get("timestamp", None)

    # Scale to 2 instances
    labels = app["labels"]
    original_labels = labels.copy()
    labels.pop("MARATHON_SINGLE_INSTANCE_APP")
    marathon_client.update_app(foldered_name, {"labels": labels})
    shakedown.deployment_wait()
    marathon_client.update_app(foldered_name, {"instances": 2})

    # Wait for second scheduler to fail
    def fn():
        timestamp = marathon_client.get_app(foldered_name).get("lastTaskFailure", {}).get("timestamp", None)
        return timestamp != old_timestamp

    shakedown.wait_for(lambda: fn())

    # Verify ZK is unchanged
    zk_config_new = shakedown.get_zk_node_data(zk_path)
    assert zk_config_old == zk_config_new

    # In order to prevent the second scheduler instance from obtaining a lock, we undo the "scale-up" operation
    marathon_client.update_app(foldered_name, {"labels": original_labels, "instances": 1}, force=True)
    shakedown.deployment_wait()
Ejemplo n.º 38
0
def test_marathon_backup_and_restore_leader(marathon_service_name):
    """Backup and restore meeting is done with only one master since new master has to be able
       to read the backup file that was created by the previous master and the easiest way to
       test it is when there is 1 master
    """

    backup_file = 'backup.tar'
    backup_dir = '/tmp'
    backup_url = 'file://{}/{}'.format(backup_dir, backup_file)

    # Deploy a simple test app. It is expected to be there after leader reelection
    app_def = apps.sleep_app()
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait(app_id=app_id)

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format(app["tasksRunning"])
    task_id = app['tasks'][0]['id']

    # Abdicate the leader with backup and restore
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    url = 'v2/leader?backup={}&restore={}'.format(backup_url, backup_url)
    print('DELETE {}'.format(url))
    common.delete_marathon_path(url)

    # Wait for new leader (but same master server) to be up and ready
    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())
    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format(app["tasksRunning"])
    assert task_id == app['tasks'][0]['id'], "Task has a different ID after restore"

    # Check if the backup file exits and is valid
    cmd = 'tar -tf {}/{} | wc -l'.format(backup_dir, backup_file)
    status, data = shakedown.run_command_on_master(cmd)
    assert status, 'Failed to validate backup file {}'.format(backup_url)
    assert int(data.rstrip()) > 0, "Backup file is empty"
Ejemplo n.º 39
0
def test_scale_group():
    """ Tests the scaling of a group
    """
    client = marathon.create_client()
    try:
        client.remove_group('/test-group', True)
        shakedown.deployment_wait()
    except Exception as e:
        pass

    client.create_group(group())
    shakedown.deployment_wait()

    group_apps = client.get_group('/test-group/sleep')
    apps = group_apps['apps']
    assert len(apps) == 2
    tasks1 = client.get_tasks('/test-group/sleep/goodnight')
    tasks2 = client.get_tasks('/test-group/sleep/goodnight2')
    assert len(tasks1) == 1
    assert len(tasks2) == 1

    # scale by 2 for the entire group
    client.scale_group('/test-group/sleep', 2)
    shakedown.deployment_wait()
    tasks1 = client.get_tasks('/test-group/sleep/goodnight')
    tasks2 = client.get_tasks('/test-group/sleep/goodnight2')
    assert len(tasks1) == 2
    assert len(tasks2) == 2
Ejemplo n.º 40
0
def test_update_app_poor_health():
    """ Tests updating an app with an automatic rollback due to poor health.
    """
    app_id = uuid.uuid4().hex
    app_def = readiness_and_health_app()
    app_def['id'] = app_id

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    # start with 1
    tasks = client.get_tasks(app_id)
    assert len(tasks) == 1

    # provides a testing delay to rollback from
    app_def['healthChecks'][0]['path'] = '/non-existant'
    app_def['instances'] = 2
    deployment_id = client.update_app(app_id, app_def)
    # 2 min wait
    try:
        shakedown.deployment_wait()
    except:
        client.rollback_deployment(deployment_id)
        shakedown.deployment_wait()

    tasks = client.get_tasks(app_id)
    assert len(tasks) == 1
Ejemplo n.º 41
0
def test_event_channel_for_pods():
    """Tests the Marathon event channel specific to pod events."""

    pod_def = pods.simple_pod()

    # In strict mode all tasks are started as user `nobody` by default and `nobody`
    # doesn't have permissions to write files.
    if shakedown.ee_version() == 'strict':
        pod_def['user'] = '******'
        common.add_dcos_marathon_root_user_acls()

    client = marathon.create_client()
    client.add_pod(pod_def)
    shakedown.deployment_wait()

    # look for created
    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_deployment_message():
        status, stdout = shakedown.run_command_on_master('cat events.txt')
        assert 'event_stream_attached' in stdout, "event_stream_attached event has not been produced"
        assert 'pod_created_event' in stdout, "pod_created_event event has not been produced"
        assert 'deployment_step_success' in stdout, "deployment_step_success event has not beed produced"

    check_deployment_message()

    pod_def["scaling"]["instances"] = 3
    client.update_pod(pod_def["id"], pod_def)
    shakedown.deployment_wait()

    # look for updated
    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_update_message():
        status, stdout = shakedown.run_command_on_master('cat events.txt')
        assert 'pod_updated_event' in stdout, 'pod_update_event event has not been produced'

    check_update_message()
def test_unhealthy_app_can_be_rolled_back():
    """Verifies that an updated app gets rolled back due to being unhealthy."""

    app_def = apps.readiness_and_health_app()
    app_id = app_def["id"]

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    tasks = client.get_tasks(app_id)
    assert len(
        tasks
    ) == 1, "The number of tasks is {} after deployment, but 1 was expected".format(
        len(tasks))

    app_def['healthChecks'][0]['path'] = '/non-existent'
    app_def['instances'] = 2
    deployment_id = client.update_app(app_id, app_def)

    try:
        shakedown.deployment_wait()
    except Exception:
        client.rollback_deployment(deployment_id)
        shakedown.deployment_wait()

    tasks = client.get_tasks(app_id)
    assert len(
        tasks
    ) == 1, "The number of tasks is {} after rollback, but 1 was expected".format(
        len(tasks))
Ejemplo n.º 43
0
def test_network_pinger(test_type, get_pinger_app, dns_format, marathon_service_name):
    """This test runs a pinger app and a relay app. It retrieves the python app from the
       master via the new http service (which will be moving into shakedown). Then a curl call
       to the relay will invoke a call to the 2nd pinger app and return back pong to the relay
       then back to curl.

       It tests that 1 task can network communicate to another task on the given network
       It tests inbound and outbound connectivity

       test_type param is not used.  It is passed so that it is clear which parametrized test
       is running or may be failing.
    """
    pinger_app = get_pinger_app()
    relay_app = get_pinger_app()
    relay_app["id"] = relay_app["id"].replace("pinger", "relay")
    pinger_dns = dns_format.format(pinger_app["id"].lstrip("/"), marathon_service_name)
    relay_dns = dns_format.format(relay_app["id"].lstrip("/"), marathon_service_name)

    # test pinger app to master
    shakedown.copy_file_to_master(os.path.join(scripts.scripts_dir(), "pinger.py"))

    client = marathon.create_client()

    with shakedown.master_http_service():
        # need to add app with http service in place or it will fail to fetch
        client.add_app(pinger_app)
        client.add_app(relay_app)
        shakedown.deployment_wait()
        shakedown.wait_for_dns(relay_dns)

    relay_url = 'http://{}:7777/relay-ping?url={}:7777'.format(relay_dns, pinger_dns)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=300, retry_on_exception=common.ignore_exception)
    def http_output_check():
        status, output = shakedown.run_command_on_master('curl {}'.format(relay_url))
        assert status, "curl {} failed on master with {}".format(relay_url, output)
        assert 'Pong {}'.format(pinger_app["id"]) in output
        assert 'Relay from {}'.format(relay_app["id"]) in output

    http_output_check()
Ejemplo n.º 44
0
def test_scale_app_in_group():
    """ Tests the scaling of an individual app in a group
    """
    client = marathon.create_client()
    try:
        client.remove_group('/test-group', True)
        shakedown.deployment_wait()
    except Exception as e:
        pass

    client.create_group(group())
    shakedown.deployment_wait()

    group_apps = client.get_group('/test-group/sleep')
    apps = group_apps['apps']
    assert len(apps) == 2, "Num of Apps: {} is not 2".format(len(apps))
    tasks1 = client.get_tasks('/test-group/sleep/goodnight')
    tasks2 = client.get_tasks('/test-group/sleep/goodnight2')
    assert len(tasks1) == 1, "Num of tasks 1: {} is not 1 after deployment".format(len(tasks1))
    assert len(tasks2) == 1, "Num of tasks 2: {} is not 1 after deployment".format(len(tasks2))

    # scaling just an app in the group
    client.scale_app('/test-group/sleep/goodnight', 2)
    shakedown.deployment_wait()
    tasks1 = client.get_tasks('/test-group/sleep/goodnight')
    tasks2 = client.get_tasks('/test-group/sleep/goodnight2')
    assert len(tasks1) == 2, "Num of tasks 1: {} is not 2 after scale".format(len(tasks1))
    assert len(tasks2) == 1, "Num of tasks 2: {} is not 1 after scale".format(len(tasks2))
Ejemplo n.º 45
0
def test_incremental_scale():
    """
    Scale instances of app in steps until the first error, e.g. a timeout, is
    reached.
    """
    ensure_mom_version('1.4.0-RC7')

    cluster_info()
    print(available_resources())

    app_def = {
        "id": "cap-app",
        "instances": 1,
        "cmd": "for (( ; ; )); do sleep 100000000; done",
        "cpus": 0.001,
        "mem": 8,
        "disk": 0,
        "backoffFactor": 1.0,
        "backoffSeconds": 0,
    }

    with marathon_on_marathon():
        # shakedown.delete_app_wait('/cap-app')

        client = marathon.create_client()
        client.add_app(app_def)

        for new_size in incremental_steps(
                linear_step_function(step_size=1000)):
            shakedown.echo("Scaling to {}".format(new_size))
            shakedown.deployment_wait(
                app_id='cap-app',
                timeout=timedelta(minutes=10).total_seconds())

            # Scale to 200
            client.scale_app('/cap-app', new_size)
            shakedown.deployment_wait(
                app_id='cap-app',
                timeout=timedelta(minutes=10).total_seconds())
            shakedown.echo("done.")
Ejemplo n.º 46
0
def test_pinned_task_scales_on_host_only():
    """ Tests that scaling a pinned app scales only on the pinned node.
    """
    app_def = app('pinned')
    host = ip_other_than_mom()
    pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    tasks = client.get_tasks('/pinned')
    assert len(tasks) == 1
    assert tasks[0]['host'] == host

    client.scale_app('pinned', 10)
    shakedown.deployment_wait()

    tasks = client.get_tasks('/pinned')
    assert len(tasks) == 10
    for task in tasks:
        assert task['host'] == host
Ejemplo n.º 47
0
def test_create_pod_with_private_image():
    username = os.environ['DOCKER_HUB_USERNAME']
    password = os.environ['DOCKER_HUB_PASSWORD']

    secret_name = "dockerPullConfig"
    secret_value_json = common.create_docker_pull_config_json(
        username, password)

    import json
    secret_value = json.dumps(secret_value_json)

    client = marathon.create_client()
    common.create_secret(secret_name, secret_value)

    try:
        pod_def = common.private_docker_pod(secret_name)
        client.add_pod(pod_def)
        shakedown.deployment_wait(timeout=timedelta(minutes=5).total_seconds())
        pod = client.show_pod(pod_def["id"])
        assert pod is not None
    finally:
        common.delete_secret(secret_name)
Ejemplo n.º 48
0
def test_pin_pod():
    """Tests that a pod can be pinned to a specific host."""

    pod_def = pods.ports_pod()

    host = common.ip_other_than_mom()
    common.pin_pod_to_host(pod_def, host)

    client = marathon.create_client()
    client.add_pod(pod_def)
    shakedown.deployment_wait()

    tasks = common.get_pod_tasks(pod_def["id"])
    assert len(
        tasks
    ) == 2, "The number of tasks is {} after deployment, but 2 was expected".format(
        len(tasks))

    pod = client.list_pod()[0]
    assert pod['instances'][0][
        'agentHostname'] == host, "The pod didn't get pinned to {}".format(
            host)
Ejemplo n.º 49
0
async def test_event_channel(sse_events):
    """ Tests the event channel. The way events are verified is by converting
        the parsed events to an iterator and asserting the right oder of certain
        events. Unknown events are skipped.
    """

    await common.assert_event('event_stream_attached', sse_events)

    app_def = apps.mesos_app()
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait(app_id=app_id)

    await common.assert_event('deployment_info', sse_events)
    await common.assert_event('deployment_step_success', sse_events)

    client.remove_app(app_id, True)
    shakedown.deployment_wait(app_id=app_id)

    await common.assert_event('app_terminated_event', sse_events)
Ejemplo n.º 50
0
def test_marathon_with_master_process_failure(marathon_service_name):
    """ Launches an app from Marathon and restarts the master.
        It is expected that the service endpoint will come back and that the
        task_id is the original task_id
    """

    app_def = app('master-failure')
    host = ip_other_than_mom()
    pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()
    tasks = client.get_tasks('/master-failure')
    original_task_id = tasks[0]['id']
    common.systemctl_master()
    shakedown.wait_for_service_endpoint(marathon_service_name)

    @retrying.retry(wait_fixed=1000, stop_max_delay=10000)
    def check_task_recovery():
        tasks = client.get_tasks('/master-failure')
        tasks[0]['id'] == original_task_id
Ejemplo n.º 51
0
def test_neo4j_universe_package_install(neo_package):
    """ Neo4j used to be 1 of the universe packages tested above, largely
        because there was a bug in marathon for a short period of time
        which was realized through neo4j.  However neo4j is so strongly different
        that we can't test it like the other services.  It is NOT a framework
        so framework health checks do not work with neo4j.
    """
    package = neo_package
    shakedown.install_package(package)
    shakedown.deployment_wait(timeout=timedelta(minutes=5).total_seconds(),
                              app_id='neo4j/core')

    assert shakedown.package_installed(package), 'Package failed to install'

    marathon_client = marathon.create_client()
    tasks = marathon_client.get_tasks('neo4j/core')

    for task in tasks:
        assert task['healthCheckResults'][0][
            'lastSuccess'] is not None, 'Healthcheck was not successful'
        assert task['healthCheckResults'][0][
            'consecutiveFailures'] == 0, 'Healthcheck had consecutive failures'
Ejemplo n.º 52
0
def test_docker_dns_mapping(marathon_service_name):
    """Tests that a running Docker task is accessible via DNS."""

    app_def = apps.docker_http_server(app_id='docker-dns-mapping-app')

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait(app_id=app_def["id"])

    bad_cmd = 'ping -c 1 docker-test.marathon-user.mesos-bad'
    status, output = shakedown.run_command_on_master(bad_cmd)
    assert not status

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_dns():
        dnsname = '{}.{}.mesos'.format(app_def["id"].lstrip('/'), marathon_service_name)
        cmd = 'ping -c 1 {}'.format(dnsname)
        shakedown.wait_for_dns(dnsname)
        status, output = shakedown.run_command_on_master(cmd)
        assert status, "ping failed for app using DNS lookup: {}".format(dnsname)

    check_dns()
Ejemplo n.º 53
0
def _retried_install_impl(
        package_name,
        service_name,
        expected_running_tasks,
        options={},
        package_version=None,
        timeout_seconds=TIMEOUT_SECONDS,
        install_cli=True):
    '''Cleaned up version of shakedown's package_install().'''
    package_manager = dcos.packagemanager.PackageManager(dcos.cosmos.get_cosmos_url())
    pkg = package_manager.get_package_version(package_name, package_version)

    if package_version is None:
        # Get the resolved version for logging below
        package_version = 'auto:{}'.format(pkg.version())

    log.info('Installing package={} service={} with options={} version={}'.format(
        package_name, service_name, options, package_version))

    # Trigger package install, but only if it's not already installed.
    # We expect upstream to have confirmed that it wasn't already installed beforehand.
    if sdk_marathon.app_exists(service_name):
        log.info('Marathon app={} exists, skipping package install call'.format(service_name))
    else:
        package_manager.install_app(pkg, options)

    # Install CLI while package starts to install
    if install_cli and pkg.cli_definition():
        log.info('Installing CLI for package={}'.format(package_name))
        dcos.subcommand.install(pkg)

    # Wait for expected tasks to come up
    if expected_running_tasks > 0:
        shakedown.wait_for_service_tasks_running(
            service_name, expected_running_tasks, timeout_seconds)

    # Wait for completed marathon deployment
    app_id = pkg.marathon_json(options).get('id')
    shakedown.deployment_wait(timeout_seconds, app_id)
Ejemplo n.º 54
0
def test_marathon_when_task_agent_bounced():
    """Launch an app and restart the node the task is running on."""

    app_def = apps.sleep_app()
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    shakedown.deployment_wait()
    tasks = client.get_tasks(app_def["id"])
    original_task_id = tasks[0]['id']
    shakedown.restart_agent(host)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_task_is_back():
        tasks = client.get_tasks(app_def["id"])
        assert tasks[0]['id'] == original_task_id, \
            "The task {} got replaced with {}".format(original_task_id, tasks[0]['id'])

    check_task_is_back()
Ejemplo n.º 55
0
def test_pin_pod():
    """ Tests that we can pin a pod to a host.
    """
    client = marathon.create_client()

    pod_id = "/pod-{}".format(uuid.uuid4().hex)

    pod_json = _pods_json('pod-ports.json')
    pod_json["id"] = pod_id

    host = ip_other_than_mom()
    pin_pod_to_host(pod_json, host)
    client.add_pod(pod_json)
    shakedown.deployment_wait()

    tasks = get_pod_tasks(pod_id)
    assert len(
        tasks) == 2, "Num of tasks: {} is not 2 after deployment".format(
            len(tasks))

    pod = client.list_pod()[0]
    assert pod['instances'][0]['agentHostname'] == host
Ejemplo n.º 56
0
def test_pod_port_communication():
    """ Test that 1 container can establish a socket connection to the other container in the same pod.
    """
    client = marathon.create_client()

    pod_id = "/pod-{}".format(uuid.uuid4().hex)

    pod_json = _pods_json('pod-ports.json')
    pod_json["id"] = pod_id

    # sleeps 2, then container 2 checks communication with container 1.
    # if that timesout, the task completes resulting in 1 container running
    # otherwise it is expected that 2 containers are running.
    pod_json['containers'][1]['exec']['command'][
        'shell'] = 'sleep 2; curl -m 2 localhost:$ENDPOINT_HTTPENDPOINT; if [ $? -eq 7 ]; then exit; fi; /opt/mesosphere/bin/python -m http.server $ENDPOINT_HTTPENDPOINT2'  # NOQA
    client.add_pod(pod_json)
    shakedown.deployment_wait()

    tasks = get_pod_tasks(pod_id)
    assert len(
        tasks) == 2, "Num of tasks: {} is not 2 after deployment".format(
            len(tasks))
Ejemplo n.º 57
0
def test_default_user():
    """ Ensures the default user of a task is started as root.  This is the default user.
    """

    # launch unique-sleep
    application_json = get_resource("{}/unique-sleep.json".format(
        fixture_dir()))
    client = marathon.create_client()
    client.add_app(application_json)
    shakedown.deployment_wait()
    app = client.get_app(application_json['id'])
    assert app['user'] is None

    # wait for deployment to finish
    tasks = client.get_tasks("unique-sleep")
    host = tasks[0]['host']

    assert shakedown.run_command_on_agent(
        host, "ps aux | grep '[s]leep ' | awk '{if ($1 !=\"root\") exit 1;}'")

    client = marathon.create_client()
    client.remove_app("/unique-sleep")
Ejemplo n.º 58
0
def test_launch_container_with_persistent_volume():
    """ Tests launching a task with PV.  It will write to a file in the PV.
        The app is killed and restarted and we can still read from the PV.
    """
    app_def = persistent_volume_app()
    app_id = app_def['id']
    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    tasks = client.get_tasks(app_id)
    assert len(tasks) == 1

    port = tasks[0]['ports'][0]
    host = tasks[0]['host']
    cmd = "curl {}:{}/data/foo".format(host, port)
    run, data = shakedown.run_command_on_master(cmd)

    assert run, "{} did not succeed".format(cmd)
    assert data == 'hello\n', "'{}' was not equal to hello\\n".format(data)

    client.restart_app(app_id)
    shakedown.deployment_wait()

    @retrying.retry(wait_fixed=1000, stop_max_delay=10000)
    def check_task_recovery():
        tasks = client.get_tasks(app_id)
        assert len(tasks) == 1

    check_task_recovery()

    port = tasks[0]['ports'][0]
    host = tasks[0]['host']
    cmd = "curl {}:{}/data/foo".format(host, port)
    run, data = shakedown.run_command_on_master(cmd)

    assert run, "{} did not succeed".format(cmd)
    assert data == 'hello\nhello\n', "'{}' was not equal to hello\\nhello\\n".format(
        data)
def test_framework_unavailable_on_mom():
    """ Launches an app that has elements necessary to create a service endpoint in DCOS.
        This test confirms that the endpoint is not created when launched with MoM.
    """
    if shakedown.service_available_predicate('pyfw'):
        client = marathon.create_client()
        client.remove_app('python-http', True)
        shakedown.deployment_wait()
        shakedown.wait_for_service_endpoint_removal('pyfw')

    with shakedown.marathon_on_marathon():
        delete_all_apps_wait()
        client = marathon.create_client()
        client.add_app(common.fake_framework_app())
        shakedown.deployment_wait()

    try:
        shakedown.wait_for_service_endpoint('pyfw', 15)
        assert False, 'MoM shoud NOT create a service endpoint'
    except:
        assert True
        pass
Ejemplo n.º 60
0
def test_incremental_app_scale():
    """
    Scale number of app in steps until the first error, e.g. a timeout, is
    reached. The apps are created in root group.
    """

    cluster_info()
    print(available_resources())

    client = marathon.create_client()
    client.remove_group('/')

    for step in itertools.count(start=1):
        shakedown.echo("Add new apps")

        app_id = "app-{0:0>4}".format(step)
        client.add_app(app_def(app_id))

        shakedown.deployment_wait(timeout=timedelta(
            minutes=15).total_seconds())

        shakedown.echo("done.")