Example #1
0
def test_mom_with_network_failure_bounce_master():
    """Marathon on Marathon (MoM) tests for DC/OS with network failures simulated by knocking out ports."""

    # get MoM ip
    mom_ip = common.ip_of_mom()
    logger.info("MoM IP: {}".format(mom_ip))

    app_def = apps.sleep_app()
    app_id = app_def["id"]

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.wait_for_task("marathon-user", app_id.lstrip('/'))
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]["id"]
        task_ip = tasks[0]['host']
        logger.info("\nTask IP: " + task_ip)

    # PR for network partitioning in shakedown makes this better
    # take out the net
    partition_agent(mom_ip)
    partition_agent(task_ip)

    # wait for a min
    time.sleep(timedelta(minutes=1).total_seconds())

    # bounce master
    run_command_on_master("sudo systemctl restart dcos-mesos-master")

    # bring the net up
    reconnect_agent(mom_ip)
    reconnect_agent(task_ip)

    time.sleep(timedelta(minutes=1).total_seconds())
    common.wait_for_service_endpoint('marathon-user',
                                     timedelta(minutes=10).total_seconds(),
                                     path="ping")

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        shakedown.wait_for_task("marathon-user", app_id.lstrip('/'),
                                timedelta(minutes=10).total_seconds())

        @retrying.retry(wait_fixed=1000,
                        stop_max_attempt_number=30,
                        retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0][
                'id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
Example #2
0
def start_master_http_service(port=7777, pid_file='python_http.pid'):
    """ Starts a http service on the master leader.  The main purpose is to serve
    up artifacts for launched test applications.   This is commonly used in combination
    with copying tests or artifacts to the leader than configuring the messos task
    to fetch from http://master.mesos:7777/artifact.tar (becareful in a multi-master env)

    :param port: port to use for the http service
    :return: pid_file
    """
    run_command_on_master(
        'nohup /opt/mesosphere/bin/python -m http.server {} > http.log 2>&1 & '
        'echo $! > {}'.format(port, pid_file))
    return pid_file
Example #3
0
def test_docker_dns_mapping(marathon_service_name):
    """Tests that a running Docker task is accessible via DNS."""

    app_def = apps.docker_http_server(app_id='/docker-dns-mapping-app')
    app_id = app_def["id"]

    client = marathon.create_client()
    client.add_app(app_def)
    deployment_wait(service_id=app_id)

    bad_cmd = 'ping -c 1 docker-test.marathon-user.mesos-bad'
    status, output = run_command_on_master(bad_cmd)
    assert not status

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_dns():
        dnsname = '{}.{}.mesos'.format(app_id.lstrip('/'),
                                       marathon_service_name)
        cmd = 'ping -c 1 {}'.format(dnsname)
        wait_for_dns(dnsname)
        status, output = run_command_on_master(cmd)
        assert status, "ping failed for app using DNS lookup: {}".format(
            dnsname)

    check_dns()
Example #4
0
def assert_http_code(url, http_code='200'):
    cmd = r'curl -s -o /dev/null -w "%{http_code}"'
    cmd = cmd + ' {}'.format(url)
    status, output = run_command_on_master(cmd)

    assert status, "{} failed".format(cmd)
    assert output == http_code, "Got {} status code".format(output)
Example #5
0
def assert_http_code(url, http_code='200'):
    cmd = r'curl -s -o /dev/null -w "%{http_code}"'
    cmd = cmd + ' {}'.format(url)
    status, output = run_command_on_master(cmd)

    assert status, "{} failed".format(cmd)
    assert output == http_code, "Got {} status code".format(output)
Example #6
0
 def check_dns():
     dnsname = '{}.{}.mesos'.format(app_id.lstrip('/'),
                                    marathon_service_name)
     cmd = 'ping -c 1 {}'.format(dnsname)
     wait_for_dns(dnsname)
     status, output = run_command_on_master(cmd)
     assert status, "ping failed for app using DNS lookup: {}".format(
         dnsname)
def test_mom_with_network_failure_bounce_master():
    """Marathon on Marathon (MoM) tests for DC/OS with network failures simulated by knocking out ports."""

    # get MoM ip
    mom_ip = common.ip_of_mom()
    logger.info("MoM IP: {}".format(mom_ip))

    app_def = apps.sleep_app()
    app_id = app_def["id"]

    with marathon_on_marathon() as client:
        client.add_app(app_def)
        wait_for_task("marathon-user", app_id.lstrip('/'))
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]["id"]
        task_ip = tasks[0]['host']
        logger.info("\nTask IP: " + task_ip)

    # PR for network partitioning in shakedown makes this better
    # take out the net
    partition_agent(mom_ip)
    partition_agent(task_ip)

    # wait for a min
    time.sleep(timedelta(minutes=1).total_seconds())

    # bounce master
    run_command_on_master("sudo systemctl restart dcos-mesos-master")

    # bring the net up
    reconnect_agent(mom_ip)
    reconnect_agent(task_ip)

    time.sleep(timedelta(minutes=1).total_seconds())
    wait_for_service_endpoint('marathon-user', timedelta(minutes=10).total_seconds(), path="ping")

    with marathon_on_marathon() as client:
        wait_for_task("marathon-user", app_id.lstrip('/'), timedelta(minutes=10).total_seconds())

        @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0]['id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
Example #8
0
def test_marathon_backup_and_restore_leader(marathon_service_name):
    """Backup and restore meeting is done with only one master since new master has to be able
       to read the backup file that was created by the previous master and the easiest way to
       test it is when there is 1 master
    """

    backup_file = 'backup.tar'
    backup_dir = '/tmp'
    backup_url = 'file://{}/{}'.format(backup_dir, backup_file)

    # Deploy a simple test app. It is expected to be there after leader reelection
    app_def = apps.sleep_app()
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    common.deployment_wait(service_id=app_id)

    app = client.get_app(app_id)
    assert app[
        'tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format(
            app["tasksRunning"])
    task_id = app['tasks'][0]['id']

    # Abdicate the leader with backup and restore
    original_leader = marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    params = '?backup={}&restore={}'.format(backup_url, backup_url)
    print('DELETE /v2/leader{}'.format(params))
    common.abdicate_marathon_leader(params)

    # Wait for new leader (but same master server) to be up and ready
    common.wait_for_service_endpoint(marathon_service_name,
                                     timedelta(minutes=5).total_seconds(),
                                     path="ping")
    app = client.get_app(app_id)
    assert app[
        'tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format(
            app["tasksRunning"])
    assert task_id == app['tasks'][0][
        'id'], "Task has a different ID after restore"

    # Check if the backup file exits and is valid
    cmd = 'tar -tf {}/{} | wc -l'.format(backup_dir, backup_file)
    status, data = run_command_on_master(cmd)
    assert status, 'Failed to validate backup file {}'.format(backup_url)
    assert int(data.rstrip()) > 0, "Backup file is empty"
def test_marathon_backup_and_restore_leader(marathon_service_name):
    """Backup and restore meeting is done with only one master since new master has to be able
       to read the backup file that was created by the previous master and the easiest way to
       test it is when there is 1 master
    """

    backup_file = 'backup.tar'
    backup_dir = '/tmp'
    backup_url = 'file://{}/{}'.format(backup_dir, backup_file)

    # Deploy a simple test app. It is expected to be there after leader reelection
    app_def = apps.sleep_app()
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    deployment_wait(service_id=app_id)

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format(app["tasksRunning"])
    task_id = app['tasks'][0]['id']

    # Abdicate the leader with backup and restore
    original_leader = marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    params = '?backup={}&restore={}'.format(backup_url, backup_url)
    print('DELETE /v2/leader{}'.format(params))
    common.abdicate_marathon_leader(params)

    # Wait for new leader (but same master server) to be up and ready
    wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds(), path="ping")
    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format(app["tasksRunning"])
    assert task_id == app['tasks'][0]['id'], "Task has a different ID after restore"

    # Check if the backup file exits and is valid
    cmd = 'tar -tf {}/{} | wc -l'.format(backup_dir, backup_file)
    status, data = run_command_on_master(cmd)
    assert status, 'Failed to validate backup file {}'.format(backup_url)
    assert int(data.rstrip()) > 0, "Backup file is empty"
def test_docker_dns_mapping(marathon_service_name):
    """Tests that a running Docker task is accessible via DNS."""

    app_def = apps.docker_http_server(app_id='/docker-dns-mapping-app')
    app_id = app_def["id"]

    client = marathon.create_client()
    client.add_app(app_def)
    deployment_wait(service_id=app_id)

    bad_cmd = 'ping -c 1 docker-test.marathon-user.mesos-bad'
    status, output = run_command_on_master(bad_cmd)
    assert not status

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_dns():
        dnsname = '{}.{}.mesos'.format(app_id.lstrip('/'), marathon_service_name)
        cmd = 'ping -c 1 {}'.format(dnsname)
        wait_for_dns(dnsname)
        status, output = run_command_on_master(cmd)
        assert status, "ping failed for app using DNS lookup: {}".format(dnsname)

    check_dns()
Example #11
0
 def check_data(port, path, expected):
     cmd = "curl {}:{}/{}/foo".format(host, port, path)
     run, data = run_command_on_master(cmd)
     assert run, "{} did not succeed".format(cmd)
     assert expected in data, "{} not found in '{}'n".format(expected, data)
Example #12
0
 def value_check():
     status, data = run_command_on_master(cmd)
     assert status, "{} did not succeed".format(cmd)
     assert data.rstrip() == secret_value
 def http_output_check():
     status, output = run_command_on_master('curl {}'.format(relay_url))
     assert status, "curl {} failed on master with {}".format(relay_url, output)
     assert 'Pong {}'.format(pinger_app["id"]) in output
     assert 'Relay from {}'.format(relay_app["id"]) in output
Example #14
0
def restart_master_node():
    """Restarts the master node."""

    run_command_on_master("sudo /sbin/shutdown -r now")
Example #15
0
def systemctl_master(command='restart'):
    run_command_on_master('sudo systemctl {} dcos-mesos-master'.format(command))
Example #16
0
 def value_check():
     status, data = run_command_on_master(cmd)
     assert status, "{} did not succeed".format(cmd)
     assert data.rstrip() == secret_value
Example #17
0
 def value_check():
     status, data = run_command_on_master(cmd)
     assert status, "{} did not succeed. status = {}, data = {}".format(
         cmd, status, data)
     assert data.rstrip() == secret_value, "Got an unexpected secret data"
 def check_dns():
     dnsname = '{}.{}.mesos'.format(app_id.lstrip('/'), marathon_service_name)
     cmd = 'ping -c 1 {}'.format(dnsname)
     wait_for_dns(dnsname)
     status, output = run_command_on_master(cmd)
     assert status, "ping failed for app using DNS lookup: {}".format(dnsname)
Example #19
0
def systemctl_master(command='restart'):
    """ Used to start, stop or restart the master process
    """
    run_command_on_master(
        'sudo systemctl {} dcos-mesos-master'.format(command))
Example #20
0
def systemctl_master(command='restart'):
    run_command_on_master(
        'sudo systemctl {} dcos-mesos-master'.format(command))
    def check_task(cmd, target_data):
        run, data = run_command_on_master(cmd)

        assert run, "{} did not succeed".format(cmd)
        assert target_data in data, "'{}' not found in {}".format(target_data, data)
Example #22
0
 def check_http_endpoint(port, path):
     cmd = "curl {}:{}/{}/foo".format(host, port, path)
     run, data = run_command_on_master(cmd)
     assert run, "{} did not succeed".format(cmd)
     assert data == 'hello\n', "'{}' was not equal to hello\\n".format(data)
Example #23
0
 def http_output_check():
     status, output = run_command_on_master('curl {}'.format(relay_url))
     assert status, "curl {} failed on master with {}".format(
         relay_url, output)
     assert 'Pong {}'.format(pinger_app["id"]) in output
     assert 'Relay from {}'.format(relay_app["id"]) in output
Example #24
0
 def value_check():
     status, data = run_command_on_master(cmd)
     assert status, "{} did not succeed. status = {}, data = {}".format(cmd, status, data)
     assert data.rstrip() == secret_value, "Got an unexpected secret data"
    def check_task(cmd, target_data):
        run, data = run_command_on_master(cmd)

        assert run, "{} did not succeed".format(cmd)
        assert data == target_data, "'{}' was not equal to {}".format(data, target_data)
Example #26
0
 def check_data(port, path):
     cmd = "curl {}:{}/{}/foo".format(host, port, path)
     run, data = run_command_on_master(cmd)
     assert run, "{} did not succeed".format(cmd)
     assert 'hello\nhello\n' in data, "'hello\nhello\n' not found in '{}'n".format(
         data)
Example #27
0
def restart_master_node():
    """Restarts the master node."""

    run_command_on_master("sudo /sbin/shutdown -r now")
Example #28
0
    def check_task(cmd, target_data):
        run, data = run_command_on_master(cmd)

        assert run, "{} did not succeed".format(cmd)
        assert data == target_data, "'{}' was not equal to {}".format(
            data, target_data)
Example #29
0
    def check_task(cmd, target_data):
        run, data = run_command_on_master(cmd)

        assert run, "{} did not succeed".format(cmd)
        assert target_data in data, "'{}' not found in {}".format(
            target_data, data)
Example #30
0
 def check_http_endpoint(port, path, expected):
     cmd = "curl {}:{}/{}/foo".format(host, port, path)
     run, data = run_command_on_master(cmd)
     assert run, "{} did not succeed".format(cmd)
     assert expected in data, "'{}' was not found in '{}'".format(data, expected)