Esempio n. 1
0
def get_marathon_leader_not_on_master_leader_node():
    marathon_leader = shakedown.marathon_leader_ip()
    master_leader = shakedown.master_leader_ip()
    print('marathon: {}'.format(marathon_leader))
    print('leader: {}'.format(master_leader))

    if marathon_leader == master_leader:
        delete_marathon_path('v2/leader')
        shakedown.wait_for_service_endpoint('marathon', timedelta(minutes=5).total_seconds())
        marathon_leadership_changed(marathon_leader)
        marathon_leader = shakedown.marathon_leader_ip()
        print('switched leader to: {}'.format(marathon_leader))

    return marathon_leader
Esempio n. 2
0
def get_marathon_leader_not_on_master_leader_node():
    marathon_leader = shakedown.marathon_leader_ip()
    master_leader = shakedown.master_leader_ip()
    print('marathon: {}'.format(marathon_leader))
    print('leader: {}'.format(master_leader))

    if marathon_leader == master_leader:
        delete_marathon_path('v2/leader')
        shakedown.wait_for_service_endpoint('marathon', timedelta(minutes=5).total_seconds())
        new_leader = shakedown.marathon_leader_ip()
        assert new_leader != marathon_leader, "A new Marathon leader has not been elected"
        marathon_leader = new_leader
        print('switched leader to: {}'.format(marathon_leader))

    return marathon_leader
Esempio n. 3
0
def get_marathon_leader_not_on_master_leader_node():
    marathon_leader = shakedown.marathon_leader_ip()
    master_leader = shakedown.master_leader_ip()
    print('marathon: {}'.format(marathon_leader))
    print('leader: {}'.format(master_leader))

    if marathon_leader == master_leader:
        delete_marathon_path('v2/leader')
        shakedown.wait_for_service_endpoint('marathon', timedelta(minutes=5).total_seconds())
        new_leader = shakedown.marathon_leader_ip()
        assert new_leader != marathon_leader, "A new Marathon leader has not been elected"
        marathon_leader = new_leader
        print('switched leader to: {}'.format(marathon_leader))

    return marathon_leader
Esempio n. 4
0
def __marathon_leadership_changed_in_mesosDNS(original_leader):
    """ This method uses mesosDNS to verify that the leadership changed.
        We have to retry because mesosDNS checks for changes only every 30s.
    """
    current_leader = shakedown.marathon_leader_ip()
    print('leader according to MesosDNS: {}'.format(current_leader))
    assert original_leader != current_leader
Esempio n. 5
0
def events_to_file():
    leader_ip = shakedown.marathon_leader_ip()
    print("entering events_to_file fixture")
    shakedown.run_command(leader_ip, 'rm events.txt')

    # In strict mode marathon runs in SSL mode on port 8443 and requires authentication
    if shakedown.ee_version() == 'strict':
        shakedown.run_command(
            leader_ip,
            '(curl --compressed -H "Cache-Control: no-cache" -H "Accept: text/event-stream" '
            + '-H "Authorization: token={}" '.format(
                shakedown.dcos_acs_token()) +
            '-o events.txt -k https://marathon.mesos:8443/v2/events; echo $? > events.exitcode) &'
        )

    # Otherwise marathon runs on HTTP mode on port 8080
    else:
        shakedown.run_command(
            leader_ip,
            '(curl --compressed -H "Cache-Control: no-cache" -H "Accept: text/event-stream" '
            '-o events.txt http://marathon.mesos:8080/v2/events; echo $? > events.exitcode) &'
        )

    yield
    shakedown.kill_process_on_host(leader_ip, '[c]url')
    shakedown.run_command(leader_ip, 'rm events.txt')
    shakedown.run_command(leader_ip, 'rm events.exitcode')
    print("exiting events_to_file fixture")
Esempio n. 6
0
def test_marathon_delete_leader(marathon_service_name):
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    common.delete_marathon_path('v2/leader')

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    marathon_leadership_changed(original_leader)
Esempio n. 7
0
def test_marathon_delete_leader_and_check_apps(marathon_service_name):

    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))

    # start an app
    app_def = common.app(id=uuid.uuid4().hex)
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1

    # abdicate leader after app was started successfully
    common.delete_marathon_path('v2/leader')

    shakedown.wait_for_service_endpoint(marathon_service_name,
                                        timedelta(minutes=5).total_seconds())

    @retrying.retry(stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def marathon_leadership_changed():
        current_leader = shakedown.marathon_leader_ip()
        print('leader: {}'.format(current_leader))
        assert original_leader != current_leader

    # wait until leader changed
    marathon_leadership_changed()

    @retrying.retry(stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_app_existence(expected_instances):
        app = client.get_app(app_id)
        assert app['tasksRunning'] == expected_instances

    # check if app definition is still there and one instance is still running after new leader was elected
    check_app_existence(1)

    client.remove_app(app_id)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 0

    # abdicate leader after app was started successfully
    common.delete_marathon_path('v2/leader')

    shakedown.wait_for_service_endpoint(marathon_service_name,
                                        timedelta(minutes=5).total_seconds())

    # wait until leader changed
    marathon_leadership_changed()

    # check if app definition is still not there and no instance is running after new leader was elected
    check_app_existence(0)
Esempio n. 8
0
def run_command_on_marathon_leader(command,
                                   username=None,
                                   key_path=None,
                                   noisy=True):
    """ Run a command on the Marathon leader
    """

    return run_command(shakedown.marathon_leader_ip(), command, username,
                       key_path, noisy)
Esempio n. 9
0
def __marathon_leadership_changed_in_mesosDNS(original_leader):
    """ This method uses mesosDNS to verify that the leadership changed.
        We have to retry because mesosDNS checks for changes only every 30s.
    """
    current_leader = shakedown.marathon_leader_ip()
    print(f'leader according to MesosDNS: {current_leader}, original leader: {original_leader}') # NOQA E999
    error = f'Current leader did not change: original={original_leader}, current={current_leader}' # NOQA E999
    assert original_leader != current_leader, error
    return current_leader
Esempio n. 10
0
def run_command_on_marathon_leader(
        command,
        username=None,
        key_path=None,
        noisy=True
):
    """ Run a command on the Marathon leader
    """

    return run_command(shakedown.marathon_leader_ip(), command, username, key_path, noisy)
Esempio n. 11
0
def test_marathon_delete_leader(marathon_service_name):
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    common.abdicate_marathon_leader()

    common.wait_for_service_endpoint(marathon_service_name,
                                     timedelta(minutes=5).total_seconds(),
                                     path="ping")

    common.assert_marathon_leadership_changed(original_leader)
def test_marathon_delete_leader_and_check_apps(marathon_service_name):

    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))

    # start an app
    app_def = common.app(id=uuid.uuid4().hex)
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1

    # abdicate leader after app was started successfully
    common.delete_marathon_path('v2/leader')

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    @retrying.retry(stop_max_attempt_number=30)
    def marathon_leadership_changed():
        current_leader = shakedown.marathon_leader_ip()
        print('leader: {}'.format(current_leader))
        assert original_leader != current_leader

    # wait until leader changed
    marathon_leadership_changed()

    @retrying.retry(stop_max_attempt_number=30)
    def check_app_existence(expected_instances):
        app = client.get_app(app_id)
        assert app['tasksRunning'] == expected_instances

    # check if app definition is still there and one instance is still running after new leader was elected
    check_app_existence(1)

    client.remove_app(app_id)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 0

    # abdicate leader after app was started successfully
    common.delete_marathon_path('v2/leader')

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    # wait until leader changed
    marathon_leadership_changed()

    # check if app definition is still not there and no instance is running after new leader was elected
    check_app_existence(0)
Esempio n. 13
0
def __marathon_leadership_changed_in_mesosDNS(original_leader):
    """ This method uses mesosDNS to verify that the leadership changed.
        We have to retry because mesosDNS checks for changes only every 30s.
    """
    current_leader = shakedown.marathon_leader_ip()
    print(f'leader according to MesosDNS: {current_leader}, original leader: {original_leader}') # NOQA E999

    assert current_leader, "MesosDNS returned empty string for Marathon leader ip."
    error = f'Current leader did not change: original={original_leader}, current={current_leader}' # NOQA E999
    assert original_leader != current_leader, error
    return current_leader
Esempio n. 14
0
def get_marathon_leader_not_on_master_leader_node():
    marathon_leader = shakedown.marathon_leader_ip()
    master_leader = shakedown.master_leader_ip()
    logger.info('marathon leader: {}'.format(marathon_leader))
    logger.info('mesos leader: {}'.format(master_leader))

    if marathon_leader == master_leader:
        delete_marathon_path('v2/leader')
        wait_for_service_endpoint('marathon', timedelta(minutes=5).total_seconds(), path="ping")
        marathon_leader = assert_marathon_leadership_changed(marathon_leader)
        logger.info('switched leader to: {}'.format(marathon_leader))

    return marathon_leader
Esempio n. 15
0
def get_marathon_leader_not_on_master_leader_node():
    marathon_leader = shakedown.marathon_leader_ip()
    master_leader = shakedown.master_leader_ip()
    print('marathon leader: {}'.format(marathon_leader))
    print('mesos leader: {}'.format(master_leader))

    if marathon_leader == master_leader:
        delete_marathon_path('v2/leader')
        shakedown.wait_for_service_endpoint('marathon', timedelta(minutes=5).total_seconds())
        marathon_leader = assert_marathon_leadership_changed(marathon_leader)
        print('switched leader to: {}'.format(marathon_leader))

    return marathon_leader
def test_marathon_master_partition_leader_change(marathon_service_name):

    original_leader = common.get_marathon_leader_not_on_master_leader_node()

    # blocking outbound connection to mesos master
    with shakedown.iptable_rules(original_leader):
        block_port(original_leader, 5050, direction='OUTPUT')
        #  time of the master block
        time.sleep(timedelta(minutes=1.5).total_seconds())

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    current_leader = shakedown.marathon_leader_ip()
    assert original_leader != current_leader
Esempio n. 17
0
def test_marathon_master_partition_leader_change(marathon_service_name):

    original_leader = common.get_marathon_leader_not_on_master_leader_node()

    # blocking outbound connection to mesos master
    with shakedown.iptable_rules(original_leader):
        common.block_port(original_leader, 5050, direction='OUTPUT')
        #  time of the master block
        time.sleep(timedelta(minutes=1.5).total_seconds())

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    current_leader = shakedown.marathon_leader_ip()
    assert original_leader != current_leader, "A new Marathon leader has not been elected"
Esempio n. 18
0
def test_marathon_delete_leader(marathon_service_name):
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    common.delete_marathon_path('v2/leader')

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def marathon_leadership_changed():
        current_leader = shakedown.marathon_leader_ip()
        print('leader: {}'.format(current_leader))
        assert original_leader != current_leader

    marathon_leadership_changed()
Esempio n. 19
0
def test_marathon_delete_leader(marathon_service_name):
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    common.delete_marathon_path('v2/leader')

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def marathon_leadership_changed():
        current_leader = shakedown.marathon_leader_ip()
        print('leader: {}'.format(current_leader))
        assert original_leader != current_leader

    marathon_leadership_changed()
Esempio n. 20
0
def test_marathon_zk_partition_leader_change(marathon_service_name):

    original_leader = common.get_marathon_leader_not_on_master_leader_node()

    # blocking zk on marathon leader (not master leader)
    with shakedown.iptable_rules(original_leader):
        common.block_port(original_leader, 2181, direction='INPUT')
        common.block_port(original_leader, 2181, direction='OUTPUT')
        #  time of the zk block, the duration must be greater than all the default ZK timeout values in Marathon
        time.sleep(20)

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    current_leader = shakedown.marathon_leader_ip()
    assert original_leader != current_leader, "A new Marathon leader has not been elected"
Esempio n. 21
0
def test_marathon_zk_partition_leader_change(marathon_service_name):

    original_leader = common.get_marathon_leader_not_on_master_leader_node()

    # blocking zk on marathon leader (not master leader)
    with shakedown.iptable_rules(original_leader):
        common.block_port(original_leader, 2181, direction='INPUT')
        common.block_port(original_leader, 2181, direction='OUTPUT')
        #  time of the zk block, the duration must be greater than all the default ZK timeout values in Marathon
        time.sleep(20)

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    current_leader = shakedown.marathon_leader_ip()
    assert original_leader != current_leader, "A new Marathon leader has not been elected"
Esempio n. 22
0
def test_marathon_zk_partition_leader_change(marathon_service_name):

    original_leader = common.get_marathon_leader_not_on_master_leader_node()

    # blocking zk on marathon leader (not master leader)
    with shakedown.iptable_rules(original_leader):
        block_port(original_leader, 2181, direction='INPUT')
        block_port(original_leader, 2181, direction='OUTPUT')
        #  time of the zk block
        time.sleep(5)

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    current_leader = shakedown.marathon_leader_ip()
    assert original_leader != current_leader
Esempio n. 23
0
def test_event_channel_for_pods():
    """Tests the Marathon event channel specific to pod events."""

    pod_def = pods.simple_pod()
    pod_id = pod_def['id']

    # In strict mode all tasks are started as user `nobody` by default and `nobody`
    # doesn't have permissions to write files.
    if shakedown.ee_version() == 'strict':
        pod_def['user'] = '******'
        common.add_dcos_marathon_user_acls()

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    leader_ip = shakedown.marathon_leader_ip()

    # look for created
    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_deployment_message():
        status, stdout = shakedown.run_command(leader_ip,
                                               'cat events.exitcode')
        assert str(stdout).strip(
        ) == '', "SSE stream disconnected (CURL exit code is {})".format(
            stdout.strip())
        status, stdout = shakedown.run_command(leader_ip, 'cat events.txt')
        assert 'event_stream_attached' in stdout, "event_stream_attached event has not been produced"
        assert 'pod_created_event' in stdout, "pod_created_event event has not been produced"
        assert 'deployment_step_success' in stdout, "deployment_step_success event has not beed produced"

    check_deployment_message()

    pod_def["scaling"]["instances"] = 3
    client.update_pod(pod_id, pod_def)
    common.deployment_wait(service_id=pod_id)

    # look for updated
    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_update_message():
        status, stdout = shakedown.run_command(leader_ip, 'cat events.txt')
        assert 'pod_updated_event' in stdout, 'pod_update_event event has not been produced'

    check_update_message()
Esempio n. 24
0
def test_marathon_backup_and_restore_leader(marathon_service_name):
    """Backup and restore meeting is done with only one master since new master has to be able
       to read the backup file that was created by the previous master and the easiest way to
       test it is when there is 1 master
    """

    backup_file = 'backup.tar'
    backup_dir = '/tmp'
    backup_url = 'file://{}/{}'.format(backup_dir, backup_file)

    # Deploy a simple test app. It is expected to be there after leader reelection
    app_def = apps.sleep_app()
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait(app_id=app_id)

    app = client.get_app(app_id)
    assert app[
        'tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format(
            app["tasksRunning"])
    task_id = app['tasks'][0]['id']

    # Abdicate the leader with backup and restore
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    params = '?backup={}&restore={}'.format(backup_url, backup_url)
    print('DELETE /v2/leader{}'.format(params))
    common.abdicate_marathon_leader(params)

    # Wait for new leader (but same master server) to be up and ready
    common.wait_for_service_endpoint(marathon_service_name,
                                     timedelta(minutes=5).total_seconds(),
                                     path="ping")
    app = client.get_app(app_id)
    assert app[
        'tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format(
            app["tasksRunning"])
    assert task_id == app['tasks'][0][
        'id'], "Task has a different ID after restore"

    # Check if the backup file exits and is valid
    cmd = 'tar -tf {}/{} | wc -l'.format(backup_dir, backup_file)
    status, data = shakedown.run_command_on_master(cmd)
    assert status, 'Failed to validate backup file {}'.format(backup_url)
    assert int(data.rstrip()) > 0, "Backup file is empty"
Esempio n. 25
0
def test_marathon_backup_and_restore_leader(marathon_service_name):

    backup_file = 'backup.tar'
    backup_dir = '/tmp'
    backup_url = 'file://{}/{}'.format(backup_dir, backup_file)

    # Deploy a simple test app. It is expected to be there after leader reelection
    client = marathon.create_client()
    app_def = {
        "id": "/sleep",
        "instances": 1,
        "cpus": 0.01,
        "mem": 32,
        "cmd": "sleep 100000"
    }

    app_id = app_def['id']
    client.add_app(app_def)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1
    task_id = app['tasks'][0]['id']

    # Abdicate the leader with backup and restore
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    url = 'v2/leader?backup={}&restore={}'.format(backup_url, backup_url)
    print('DELETE {}'.format(url))
    common.delete_marathon_path(url)

    # Wait for new leader (but same master server) to be up and ready
    shakedown.wait_for_service_endpoint(marathon_service_name,
                                        timedelta(minutes=5).total_seconds())
    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1
    assert task_id == app['tasks'][0][
        'id'], "Task has a different Id after restore"

    # Check if the backup file exits and is valid
    cmd = 'tar -tf {}/{} | wc -l'.format(backup_dir, backup_file)
    run, data = shakedown.run_command_on_master(cmd)
    assert run, 'Failed to validate backup file {}'.format(backup_url)
    assert int(data.rstrip()) > 0, "Backup file is empty"
def test_marathon_backup_and_restore_leader(marathon_service_name):

    backup_file = 'backup.tar'
    backup_dir = '/tmp'
    backup_url = 'file://{}/{}'.format(backup_dir, backup_file)

    # Deploy a simple test app. It is expected to be there after leader reelection
    client = marathon.create_client()
    app_def = {
        "id": "/sleep",
        "instances": 1,
        "cpus": 0.01,
        "mem": 32,
        "cmd": "sleep 100000"
    }

    app_id = app_def['id']
    client.add_app(app_def)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1
    task_id = app['tasks'][0]['id']

    # Abdicate the leader with backup and restore
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    url = 'v2/leader?backup={}&restore={}'.format(backup_url, backup_url)
    print('DELETE {}'.format(url))
    common.delete_marathon_path(url)

    # Wait for new leader (but same master server) to be up and ready
    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())
    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1
    assert task_id == app['tasks'][0]['id'], "Task has a different Id after restore"

    # Check if the backup file exits and is valid
    cmd = 'tar -tf {}/{} | wc -l'.format(backup_dir, backup_file)
    status, data = shakedown.run_command_on_master(cmd)
    assert status, 'Failed to validate backup file {}'.format(backup_url)
    assert int(data.rstrip()) > 0, "Backup file is empty"
Esempio n. 27
0
def test_marathon_backup_and_restore_leader(marathon_service_name):
    """Backup and restore meeting is done with only one master since new master has to be able
       to read the backup file that was created by the previous master and the easiest way to
       test it is when there is 1 master
    """

    backup_file = 'backup.tar'
    backup_dir = '/tmp'
    backup_url = 'file://{}/{}'.format(backup_dir, backup_file)

    # Deploy a simple test app. It is expected to be there after leader reelection
    app_def = apps.sleep_app()
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format(app["tasksRunning"])
    task_id = app['tasks'][0]['id']

    # Abdicate the leader with backup and restore
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    url = 'v2/leader?backup={}&restore={}'.format(backup_url, backup_url)
    print('DELETE {}'.format(url))
    common.delete_marathon_path(url)

    # Wait for new leader (but same master server) to be up and ready
    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())
    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format(app["tasksRunning"])
    assert task_id == app['tasks'][0]['id'], "Task has a different ID after restore"

    # Check if the backup file exits and is valid
    cmd = 'tar -tf {}/{} | wc -l'.format(backup_dir, backup_file)
    status, data = shakedown.run_command_on_master(cmd)
    assert status, 'Failed to validate backup file {}'.format(backup_url)
    assert int(data.rstrip()) > 0, "Backup file is empty"
Esempio n. 28
0
def test_event_channel():
    """ Tests the event channel.  The way events are verified is by streaming the events
        to a events.txt file.   The fixture ensures the file is removed before and after the test.
        events checked are connecting, deploying a good task and killing a task.
    """
    app_def = apps.mesos_app()
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait(app_id=app_id)

    leader_ip = shakedown.marathon_leader_ip()

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_deployment_message():
        status, stdout = shakedown.run_command(leader_ip,
                                               'cat events.exitcode')
        assert str(stdout).strip(
        ) == '', "SSE stream disconnected (CURL exit code is {})".format(
            stdout.strip())
        status, stdout = shakedown.run_command(leader_ip, 'cat events.txt')
        assert 'event_stream_attached' in stdout, "event_stream_attached event has not been found"
        assert 'deployment_info' in stdout, "deployment_info event has not been found"
        assert 'deployment_step_success' in stdout, "deployment_step_success has not been found"

    check_deployment_message()
    client.remove_app(app_id, True)
    shakedown.deployment_wait()

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_kill_message():
        status, stdout = shakedown.run_command(leader_ip, 'cat events.txt')
        assert 'KILLED' in stdout, "KILLED event has not been found"

    check_kill_message()
Esempio n. 29
0
def test_marathon_delete_leader_and_check_apps(marathon_service_name):
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))

    app_def = apps.sleep_app()
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format(app["tasksRunning"])

    # abdicate leader after app was started successfully
    common.delete_marathon_path('v2/leader')

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def marathon_leadership_changed():
        current_leader = shakedown.marathon_leader_ip()
        print('leader: {}'.format(current_leader))
        if original_leader == current_leader:
            common.delete_marathon_path('v2/leader')
        assert original_leader != current_leader, "A new Marathon leader has not been elected"

    # wait until leader changed
    marathon_leadership_changed()

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_app_existence(expected_instances):
        app = client.get_app(app_id)
        assert app['tasksRunning'] == expected_instances
        assert app['tasksRunning'] == expected_instances, \
            "The number of running tasks is {}, but {} was expected".format(app["tasksRunning"], expected_instances)

    # check if app definition is still there and one instance is still running after new leader was elected
    check_app_existence(1)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def remove_app(app_id):
        client.remove_app(app_id)

    remove_app(app_id)
    shakedown.deployment_wait()

    try:
        _ = client.get_app(app_id)
    except:
        pass
    else:
        assert False, "The application resurrected"

    # abdicate leader after app was started successfully
    common.delete_marathon_path('v2/leader')

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    # wait until leader changed
    marathon_leadership_changed()

    # check if app definition is still not there
    try:
        _ = client.get_app(app_id)
    except:
        pass
    else:
        assert False, "The application resurrected"
Esempio n. 30
0
 def marathon_leadership_changed():
     current_leader = shakedown.marathon_leader_ip()
     print('leader: {}'.format(current_leader))
     assert original_leader != current_leader, "A new Marathon leader has not been elected"
Esempio n. 31
0
def test_marathon_backup_and_check_apps(marathon_service_name):

    backup_file1 = 'backup1.tar'
    backup_file2 = 'backup2.tar'
    backup_dir = '/tmp'

    for master_ip in shakedown.get_all_master_ips():
        _ = shakedown.run_command(master_ip, "rm {}/{}".format(backup_dir, backup_file1))
        _ = shakedown.run_command(master_ip, "rm {}/{}".format(backup_dir, backup_file2))

    backup_url1 = 'file://{}/{}'.format(backup_dir, backup_file1)
    backup_url2 = 'file://{}/{}'.format(backup_dir, backup_file2)

    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))

    app_def = apps.sleep_app()
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format(app["tasksRunning"])

    # Abdicate the leader with backup
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    url = 'v2/leader?backup={}'.format(backup_url1)
    print('DELETE {}'.format(url))
    common.delete_marathon_path(url)

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def marathon_leadership_changed():
        current_leader = shakedown.marathon_leader_ip()
        print('leader: {}'.format(current_leader))
        assert original_leader != current_leader, "A new Marathon leader has not been elected"

    # wait until leader changed
    marathon_leadership_changed()

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_app_existence(expected_instances):
        try:
            app = client.get_app(app_id)
        except Exception as e:
            if expected_instances != 0:
                raise e
        else:
            if expected_instances == 0:
                assert False, "The application resurrected"
            else:
                app['tasksRunning'] == expected_instances, \
                    "The number of running tasks is {}, but {} was expected".format(
                        app["tasksRunning"], expected_instances)

    # check if app definition is still there and one instance is still running after new leader was elected
    check_app_existence(1)

    # then remove
    client.remove_app(app_id)
    shakedown.deployment_wait()

    check_app_existence(0)

    # Do a second backup. Before MARATHON-7525 we had the problem, that doing a backup after an app was deleted
    # leads to the state that marathon was not able to re-start, because the second backup failed constantly.

    # Abdicate the leader with backup
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    url = 'v2/leader?backup={}'.format(backup_url2)
    print('DELETE {}'.format(url))
    common.delete_marathon_path(url)

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    # wait until leader changed
    # if leader changed, this means that marathon was able to start again, which is great :-).
    marathon_leadership_changed()

    # check if app definition is still not there and no instance is running after new leader was elected
    check_app_existence(0)
Esempio n. 32
0
 def marathon_leadership_changed():
     current_leader = shakedown.marathon_leader_ip()
     print('leader: {}'.format(current_leader))
     if original_leader == current_leader:
         common.delete_marathon_path('v2/leader')
     assert original_leader != current_leader, "A new Marathon leader has not been elected"
Esempio n. 33
0
 def marathon_leadership_changed():
     current_leader = shakedown.marathon_leader_ip()
     print('leader: {}'.format(current_leader))
     if original_leader == current_leader:
         common.delete_marathon_path('v2/leader')
     assert original_leader != current_leader, "A new Marathon leader has not been elected"
Esempio n. 34
0
 def marathon_leadership_changed():
     current_leader = shakedown.marathon_leader_ip()
     print('leader: {}'.format(current_leader))
     assert original_leader != current_leader, "A new Marathon leader has not been elected"
Esempio n. 35
0
def test_marathon_delete_leader_and_check_apps(marathon_service_name):
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))

    app_def = apps.sleep_app()
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format(app["tasksRunning"])

    # abdicate leader after app was started successfully
    common.delete_marathon_path('v2/leader')

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def marathon_leadership_changed():
        current_leader = shakedown.marathon_leader_ip()
        print('leader: {}'.format(current_leader))
        if original_leader == current_leader:
            common.delete_marathon_path('v2/leader')
        assert original_leader != current_leader, "A new Marathon leader has not been elected"

    # wait until leader changed
    marathon_leadership_changed()

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_app_existence(expected_instances):
        app = client.get_app(app_id)
        assert app['tasksRunning'] == expected_instances
        assert app['tasksRunning'] == expected_instances, \
            "The number of running tasks is {}, but {} was expected".format(app["tasksRunning"], expected_instances)

    # check if app definition is still there and one instance is still running after new leader was elected
    check_app_existence(1)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def remove_app(app_id):
        client.remove_app(app_id)

    remove_app(app_id)
    shakedown.deployment_wait()

    try:
        _ = client.get_app(app_id)
    except:
        pass
    else:
        assert False, "The application resurrected"

    # abdicate leader after app was started successfully
    common.delete_marathon_path('v2/leader')

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    # wait until leader changed
    marathon_leadership_changed()

    # check if app definition is still not there
    try:
        _ = client.get_app(app_id)
    except:
        pass
    else:
        assert False, "The application resurrected"
Esempio n. 36
0
def test_marathon_backup_and_check_apps(marathon_service_name):

    backup_file1 = 'backup1.tar'
    backup_file2 = 'backup2.tar'
    backup_dir = '/tmp'
    backup_url1 = 'file://{}/{}'.format(backup_dir, backup_file1)
    backup_url2 = 'file://{}/{}'.format(backup_dir, backup_file2)

    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))

    # start an app
    app_def = common.app(id=uuid.uuid4().hex)
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1

    # Abdicate the leader with backup
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    url = 'v2/leader?backup={}'.format(backup_url1)
    print('DELETE {}'.format(url))
    common.delete_marathon_path(url)

    shakedown.wait_for_service_endpoint(marathon_service_name,
                                        timedelta(minutes=5).total_seconds())

    @retrying.retry(stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def marathon_leadership_changed():
        current_leader = shakedown.marathon_leader_ip()
        print('leader: {}'.format(current_leader))
        assert original_leader != current_leader

    # wait until leader changed
    marathon_leadership_changed()

    @retrying.retry(stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_app_existence(expected_instances):
        app = client.get_app(app_id)
        assert app['tasksRunning'] == expected_instances

    # check if app definition is still there and one instance is still running after new leader was elected
    check_app_existence(1)

    # then remove
    client.remove_app(app_id)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 0

    # Do a second backup. Before MARATHON-7525 we had the problem, that doing a backup after an app was deleted
    # leads to the state that marathon was not able to re-start, because the second backup failed constantly.

    # Abdicate the leader with backup
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    url = 'v2/leader?backup={}'.format(backup_url2)
    print('DELETE {}'.format(url))
    common.delete_marathon_path(url)

    shakedown.wait_for_service_endpoint(marathon_service_name,
                                        timedelta(minutes=5).total_seconds())

    # wait until leader changed
    # if leader changed, this means that marathon was able to start again, which is great :-).
    marathon_leadership_changed()

    # check if app definition is still not there and no instance is running after new leader was elected
    check_app_existence(0)
 def marathon_leadership_changed():
     current_leader = shakedown.marathon_leader_ip()
     print('leader: {}'.format(current_leader))
     assert original_leader != current_leader
Esempio n. 38
0
 def marathon_leadership_changed():
     current_leader = shakedown.marathon_leader_ip()
     print('leader: {}'.format(current_leader))
     assert original_leader != current_leader
Esempio n. 39
0
def test_marathon_backup_and_check_apps(marathon_service_name):

    backup_file1 = 'backup1.tar'
    backup_file2 = 'backup2.tar'
    backup_dir = '/tmp'

    for master_ip in shakedown.get_all_master_ips():
        _ = shakedown.run_command(master_ip, "rm {}/{}".format(backup_dir, backup_file1))
        _ = shakedown.run_command(master_ip, "rm {}/{}".format(backup_dir, backup_file2))

    backup_url1 = 'file://{}/{}'.format(backup_dir, backup_file1)
    backup_url2 = 'file://{}/{}'.format(backup_dir, backup_file2)

    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))

    app_def = apps.sleep_app()
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format(app["tasksRunning"])

    # Abdicate the leader with backup
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    url = 'v2/leader?backup={}'.format(backup_url1)
    print('DELETE {}'.format(url))
    common.delete_marathon_path(url)

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def marathon_leadership_changed():
        current_leader = shakedown.marathon_leader_ip()
        print('leader: {}'.format(current_leader))
        assert original_leader != current_leader, "A new Marathon leader has not been elected"

    # wait until leader changed
    marathon_leadership_changed()

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_app_existence(expected_instances):
        try:
            app = client.get_app(app_id)
        except Exception as e:
            if expected_instances != 0:
                raise e
        else:
            if expected_instances == 0:
                assert False, "The application resurrected"
            else:
                app['tasksRunning'] == expected_instances, \
                    "The number of running tasks is {}, but {} was expected".format(
                        app["tasksRunning"], expected_instances)

    # check if app definition is still there and one instance is still running after new leader was elected
    check_app_existence(1)

    # then remove
    client.remove_app(app_id)
    shakedown.deployment_wait()

    check_app_existence(0)

    # Do a second backup. Before MARATHON-7525 we had the problem, that doing a backup after an app was deleted
    # leads to the state that marathon was not able to re-start, because the second backup failed constantly.

    # Abdicate the leader with backup
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    url = 'v2/leader?backup={}'.format(backup_url2)
    print('DELETE {}'.format(url))
    common.delete_marathon_path(url)

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    # wait until leader changed
    # if leader changed, this means that marathon was able to start again, which is great :-).
    marathon_leadership_changed()

    # check if app definition is still not there and no instance is running after new leader was elected
    check_app_existence(0)