Exemple #1
0
def test_master_driver_shutdown_sequence(kind, master_cmd, service_cmd,
                                         client, tmpdir):
    spec = skein.ApplicationSpec(
        name="test_master_driver_shutdown_sequence_%s" % kind,
        master=skein.Master(script=master_cmd),
        services={
            'service': skein.Service(
                resources=skein.Resources(memory=128, vcores=1),
                script=service_cmd
            )
        }
    )

    state = 'SUCCEEDED' if kind.endswith('succeeds') else 'FAILED'

    if kind == 'service_succeeds':
        with run_application(client, spec=spec) as app:
            wait_for_containers(app, 1, states=['SUCCEEDED'])
            assert len(app.get_containers()) == 0
            # App hangs around until driver completes
            app.shutdown()
            assert wait_for_completion(client, app.id) == state
    else:
        with run_application(client, spec=spec, connect=False) as app_id:
            # service_fails results in immediate failure
            # driver_succeeds results in immediate success
            # driver_fails results in immediate failure
            assert wait_for_completion(client, app_id) == state
Exemple #2
0
def test_memory_limit_exceeded(kind, client):
    resources = skein.Resources(memory=128, vcores=1)
    # Allocate noticeably more memory than the 128 MB limit
    script = 'python -c "b = bytearray(int(256e6)); import time; time.sleep(10)"'

    master = services = None
    if kind == 'master':
        master = skein.Master(resources=resources, script=script)
        search_txt = "memory limit"
    else:
        services = {
            'service': skein.Service(resources=resources, script=script)
        }
        search_txt = "memory used"
    spec = skein.ApplicationSpec(name="test_memory_limit_exceeded_%s" % kind,
                                 queue="default",
                                 master=master,
                                 services=services)
    with run_application(client, spec=spec, connect=False) as app_id:
        assert wait_for_completion(client, app_id) == "FAILED"
    logs = get_logs(app_id)
    assert search_txt in logs

    if kind == 'master':
        report = client.application_report(app_id)
        assert 'memory limit' in report.diagnostics
Exemple #3
0
def test_node_locality(client, strict):
    if strict:
        relax_locality = False
        nodes = ['worker.example.com']
        racks = []
    else:
        relax_locality = True
        nodes = ['not.a.real.host.name']
        racks = ['not.a.real.rack.name']

    service = skein.Service(
        resources=skein.Resources(memory=128, vcores=1),
        script='sleep infinity',
        nodes=nodes,
        racks=racks,
        relax_locality=relax_locality
    )
    spec = skein.ApplicationSpec(name="test_node_locality",
                                 queue="default",
                                 services={"service": service})
    with run_application(client, spec=spec) as app:
        wait_for_containers(app, 1, states=['RUNNING'])
        spec2 = app.get_specification()
        app.shutdown()

    service2 = spec2.services['service']
    assert service2.nodes == nodes
    assert service2.racks == racks
    assert service2.relax_locality == relax_locality
Exemple #4
0
def test_security_specified(client):
    security = skein.Security.new_credentials()
    spec = skein.ApplicationSpec(
        name="test_security_specified",
        master=skein.Master(security=security,
                            script='sleep infinity')
    )
    with run_application(client, spec=spec) as app:
        assert app.security is security
        assert app.security != client.security

        spec2 = app.get_specification()

        app2 = client.connect(app.id, security=security)
        # Smoketest, can communicate
        app2.get_specification()

        app3 = client.connect(app.id)
        with pytest.raises(skein.ConnectionError):
            # Improper security credentials
            app3.get_specification()

        app.shutdown()

    remote_security = spec2.master.security
    assert remote_security.cert_bytes is None
    assert remote_security.key_bytes is None
    assert remote_security.cert_file.source.startswith('hdfs')
    assert remote_security.key_file.source.startswith('hdfs')
Exemple #5
0
def test_fail_on_container_failure(client, with_restarts):
    script = ('if [[ "$SKEIN_CONTAINER_ID" != "test_0" ]]; then\n'
              '  exit 1\n'
              'else\n'
              '  sleep infinity\n'
              'fi')

    spec = skein.ApplicationSpec(
        name="test_fail_on_container_failure",
        services={
            'test':
            skein.Service(instances=2,
                          max_restarts=2 if with_restarts else 0,
                          resources=skein.Resources(memory=32, vcores=1),
                          script=script)
        })
    with run_application(client, spec=spec) as app:
        wait_for_completion(client, app.id) == "FAILED"

    logs = get_logs(app.id)
    assert "test_0" in logs
    assert "test_1" in logs
    assert ("test_2" in logs) == with_restarts
    assert ("test_3" in logs) == with_restarts
    assert "test_4" not in logs
Exemple #6
0
def test_allow_failures_max_restarts(client, allow_failures):
    name = "test_max_restarts_allow_failures_%s" % str(allow_failures).lower()
    spec = skein.ApplicationSpec(
        name=name,
        master=skein.Master(
            script="sleep infinity"
        ),
        services={
            'myservice': skein.Service(
                instances=1,
                max_restarts=2,
                allow_failures=allow_failures,
                resources=skein.Resources(memory=128, vcores=1),
                script="exit 1"
            )
        }
    )
    with run_application(client, spec=spec) as app:
        if allow_failures:
            # Service failed 3 times, doesn't keep trying to run more
            wait_for_containers(app, 3, states=['FAILED'])
            # Check still running fine after 3 failures
            time.sleep(0.5)
            app.get_specification()
            # Shutdown manually
            app.shutdown()
            assert wait_for_completion(client, app.id) == 'SUCCEEDED'
        else:
            # Service failed 3 times and then terminates
            assert wait_for_completion(client, app.id) == 'FAILED'
Exemple #7
0
def test_simple_app(client):
    with run_application(client) as app:
        # smoketest repr
        repr(app)

        # Test get_specification
        a = app.get_specification()
        assert isinstance(a, skein.ApplicationSpec)
        assert 'sleeper' in a.services

        assert client.application_report(app.id).state == 'RUNNING'

        app.shutdown()

    with pytest.raises(skein.ConnectionError):
        client.connect(app.id)

    with pytest.raises(skein.ConnectionError):
        client.connect(app.id, wait=False)

    with pytest.raises(skein.ConnectionError):
        app.get_specification()

    running_apps = client.get_applications()
    assert app.id not in {a.id for a in running_apps}

    finished_apps = client.get_applications(states=['finished'])
    assert app.id in {a.id for a in finished_apps}
Exemple #8
0
def test_container_environment(client, has_kerberos_enabled):
    commands = [
        'env', 'echo "LOGIN_ID=[$(whoami)]"',
        'hdfs dfs -touchz /user/testuser/test_container_permissions'
    ]
    service = skein.Service(resources=skein.Resources(memory=124, vcores=1),
                            commands=commands)
    spec = skein.ApplicationSpec(name="test_container_permissions",
                                 queue="default",
                                 services={'service': service})

    with run_application(client, spec=spec) as app:
        wait_for_success(client, app.id)

    logs = get_logs(app.id)
    assert "USER=testuser" in logs
    assert 'SKEIN_APPMASTER_ADDRESS=' in logs
    assert 'SKEIN_APPLICATION_ID=%s' % app.id in logs
    assert 'SKEIN_CONTAINER_ID=service_0' in logs
    assert 'SKEIN_RESOURCE_MEMORY=128' in logs
    assert 'SKEIN_RESOURCE_VCORES=1' in logs

    if has_kerberos_enabled:
        assert "LOGIN_ID=[testuser]" in logs
        assert "HADOOP_USER_NAME" not in logs
    else:
        assert "LOGIN_ID=[yarn]" in logs
        assert "HADOOP_USER_NAME" in logs
Exemple #9
0
def test_shutdown_app(client):
    with run_application(client) as app:
        ac = app.connect()

        ac.shutdown(status='SUCCEEDED')

    assert app.status().final_status == 'SUCCEEDED'
Exemple #10
0
def test_webui_acls(client, has_kerberos_enabled, ui_users, checks):
    if has_kerberos_enabled:
        pytest.skip("Testing only implemented for simple authentication")

    service = skein.Service(resources=skein.Resources(memory=128, vcores=1),
                            commands=['sleep infinity'])
    spec = skein.ApplicationSpec(name="test_webui_acls",
                                 queue="default",
                                 acls=skein.ACLs(enable=True,
                                                 ui_users=ui_users),
                                 services={'sleeper': service})

    with run_application(client, spec=spec) as app:
        # Wait for a single container
        initial = wait_for_containers(app, 1, states=['RUNNING'])
        assert initial[0].state == 'RUNNING'
        assert initial[0].service_name == 'sleeper'

        # Base url of web ui
        base = 'http://master.example.com:8088/proxy/%s' % app.id

        # Check proper subset of users allowed
        for user, ok in checks:
            resp = get_page(base + "?user.name=%s" % user)
            assert resp.ok == ok

        app.shutdown()
Exemple #11
0
def test_proxy_user(client):
    hdfs = pytest.importorskip('pyarrow.hdfs')

    spec = skein.ApplicationSpec(name="test_proxy_user",
                                 user="******",
                                 services={
                                     "service":
                                     skein.Service(resources=skein.Resources(
                                         memory=32, vcores=1),
                                                   script="sleep infinity")
                                 })
    with run_application(client, spec=spec) as app:
        spec2 = app.get_specification()
        client.kill_application(app.id, user="******")

    # Alice used throughout process
    assert spec2.user == 'alice'
    for fil in spec2.services['service'].files.values():
        assert fil.source.startswith(
            'hdfs://master.example.com:9000/user/alice')

    # Can get logs as user
    logs = get_logs(client, app.id, user="******")
    assert app.id in logs
    assert "application.master.log" in logs

    # Application directory deleted after kill
    fs = hdfs.connect()
    assert not fs.exists("/user/testuser/.skein/%s" % app.id)
Exemple #12
0
def test_kill_application_removes_appdir(client):
    hdfs = pytest.importorskip('pyarrow.hdfs')

    with run_application(client) as app:
        client.kill_application(app.id)

    fs = hdfs.connect()
    assert not fs.exists("/user/testuser/.skein/%s" % app.id)
Exemple #13
0
def test_describe(client):
    with run_application(client) as app:
        ac = app.connect()

        s = ac.describe(service='sleeper')
        assert isinstance(s, skein.Service)
        a = ac.describe()
        assert isinstance(a, skein.ApplicationSpec)
        assert a.services['sleeper'] == s
Exemple #14
0
def kv_test_app_persistent(client):
    with run_application(client) as app:
        # ensure one container id exists, but already completed
        app.scale('sleeper', 2)
        app.kill_container('sleeper_0')
        try:
            yield app
        finally:
            app.shutdown()
Exemple #15
0
def at_least_3_apps_in_history(client):
    all_apps = client.get_applications(
        states=skein.model.ApplicationState.values())
    all_expected = [
        a for a in all_apps if a.user == 'testuser' and a.queue == 'default'
    ]
    for _ in range(3 - len(all_expected)):
        with run_application(client) as app:
            app.shutdown()
Exemple #16
0
def test_file_systems(client):
    script = 'hdfs dfs -touchz /user/testuser/test_file_systems'
    service = skein.Service(resources=skein.Resources(memory=128, vcores=1),
                            script=script)
    spec = skein.ApplicationSpec(name="test_file_systems",
                                 queue="default",
                                 services={'service': service},
                                 file_systems=["hdfs://master.example.com:9000"])

    with run_application(client, spec=spec) as app:
        assert wait_for_completion(client, app.id) == 'SUCCEEDED'
Exemple #17
0
def test_kill_application_removes_appdir(use_skein, client):
    hdfs = pytest.importorskip('pyarrow.hdfs')

    with run_application(client) as app:
        if use_skein:
            client.kill_application(app.id)
        else:
            subprocess.check_call(["yarn", "application", "-kill", app.id])

    fs = hdfs.connect()
    assert not fs.exists("/user/testuser/.skein/%s" % app.id)
Exemple #18
0
def test_file_systems(client):
    commands = ['hdfs dfs -touchz /user/testuser/test_file_systems']
    service = skein.Service(resources=skein.Resources(memory=124, vcores=1),
                            commands=commands)
    spec = skein.ApplicationSpec(
        name="test_file_systems",
        queue="default",
        services={'service': service},
        file_systems=["hdfs://master.example.com:9000"])

    with run_application(client, spec=spec) as app:
        wait_for_success(client, app.id)
Exemple #19
0
def test_add_container(client):
    script = ('echo "$SKEIN_CONTAINER_ID - MYENV=$MYENV"\n'
              'echo "$SKEIN_CONTAINER_ID - MYENV2=$MYENV2"\n'
              'if [[ "$MYENV" == "bar" ]]; then\n'
              '  exit 1\n'
              'else\n'
              '  exit 0\n'
              'fi')

    spec = skein.ApplicationSpec(name="test_add_container",
                                 master=skein.Master(script="sleep infinity"),
                                 services={
                                     'test':
                                     skein.Service(instances=0,
                                                   resources=skein.Resources(
                                                       memory=32, vcores=1),
                                                   env={
                                                       'MYENV': 'foo',
                                                       'MYENV2': 'baz'
                                                   },
                                                   max_restarts=1,
                                                   script=script)
                                 })

    with run_application(client, spec=spec) as app:
        # Add container with new overrides
        c = app.add_container('test')
        assert c.instance == 0
        wait_for_containers(app, 1, states=['RUNNING', 'SUCCEEDED'])

        # Non-existant service
        with pytest.raises(ValueError):
            app.add_container('foobar')

        # Add container with override for MYENV
        c = app.add_container('test', {'MYENV': 'bar'})
        assert c.instance == 1

        # The new env var triggers a failure, should fail twice,
        # then fail the whole application
        assert wait_for_completion(client, app.id) == 'FAILED'

    logs = get_logs(app.id)
    assert "test_0 - MYENV=foo" in logs
    assert "test_0 - MYENV2=baz" in logs

    assert "test_1 - MYENV=bar" in logs
    assert "test_1 - MYENV2=baz" in logs

    assert "test_2 - MYENV=bar" in logs
    assert "test_2 - MYENV2=baz" in logs

    assert "test_3" not in logs
Exemple #20
0
def test_webui(client, has_kerberos_enabled):
    # Smoke-tests for webui
    if has_kerberos_enabled:
        pytest.skip("Testing only implemented for simple authentication")
    requests = pytest.importorskip('requests')

    with run_application(client) as app:
        # Wait for a single container
        initial = wait_for_containers(app, 1, states=['RUNNING'])
        assert initial[0].state == 'RUNNING'
        assert initial[0].service_name == 'sleeper'

        # Set some key-values
        app.kv['foo'] = b'bar'
        app.kv['bad'] = b'\255\255\255'  # non-unicode

        # Base url of web ui
        base = 'http://master.example.com:8088/proxy/%s' % app.id

        # Fails without authentication
        resp = requests.get(base)
        assert resp.status_code == 401

        # With authentication
        resp = requests.get(base + "?user.name=testuser")
        assert resp.ok
        cookies = resp.cookies

        # / and /services are the same
        for suffix in ['', '/services']:
            resp = requests.get(base + suffix, cookies=cookies)
            assert resp.ok
            assert 'sleeper_0' in resp.text  # list of containers
            assert '/testuser/sleeper.log' in resp.text  # link to logs

        # /kv store has a few items in it
        resp = requests.get(base + '/kv', cookies=cookies)
        assert resp.ok
        assert 'foo' in resp.text
        assert 'bar' in resp.text
        assert 'bad' in resp.text
        assert '<binary value>' in resp.text

        # Resources are reachable
        resp = requests.get(base + '/favicon.ico', cookies=cookies)
        assert resp.ok

        # 404 for fake pages
        resp = requests.get(base + '/not-a-real-page', cookies=cookies)
        assert resp.status_code == 404

        app.shutdown()
Exemple #21
0
def ui_test_app(client, has_kerberos_enabled):
    if has_kerberos_enabled:
        pytest.skip("Testing only implemented for simple authentication")
    with run_application(client, spec=spec) as app:
        # Wait for a single container
        wait_for_containers(app, 1, states=['RUNNING'])
        try:
            yield app
        finally:
            try:
                app.shutdown()
            except ConnectionError:
                client.kill_application(app.id)
Exemple #22
0
def test_set_log_level(client):
    service = skein.Service(resources=skein.Resources(memory=128, vcores=1),
                            script='ls')
    spec = skein.ApplicationSpec(name="test_custom_log4j_properties",
                                 queue="default",
                                 master=skein.Master(log_level='debug'),
                                 services={'service': service})

    with run_application(client, spec=spec) as app:
        assert wait_for_completion(client, app.id) == 'SUCCEEDED'

    logs = get_logs(app.id)
    assert 'DEBUG' in logs
Exemple #23
0
def test_memory_limit_exceeded(client):
    # Allocate noticeably more memory than the 128 MB limit
    service = skein.Service(
        resources=skein.Resources(memory=128, vcores=1),
        commands=[
            'python -c "b = bytearray(int(256e6)); import time; time.sleep(10)"'
        ])
    spec = skein.ApplicationSpec(name="test_memory_limit_exceeded",
                                 queue="default",
                                 services={"service": service})
    with run_application(client, spec=spec) as app:
        assert wait_for_completion(client, app.id) == "FAILED"
    logs = get_logs(app.id)
    assert "memory used" in logs
Exemple #24
0
def test_simple_app(client):
    with run_application(client) as app:
        # smoketest repr
        repr(app)

        # Test get_specification
        a = app.get_specification()
        assert isinstance(a, skein.ApplicationSpec)
        assert 'sleeper' in a.services

        assert client.application_report(app.id).state == 'RUNNING'

        # Connect with a new client
        with client.connect(app.id) as app2:
            app2.get_specification()

        # application client is closed
        with pytest.raises(Exception):
            app2.get_specification()

        # Closing an application client is idempotent
        app2.close()

        app.shutdown()

    with pytest.raises(skein.ConnectionError):
        client.connect(app.id)

    with pytest.raises(skein.ConnectionError):
        client.connect(app.id, wait=False)

    # On Travis CI there can be some lag between application being shutdown and
    # application actually shutting down. Retry up to 5 seconds before failing.
    with pytest.raises(skein.ConnectionError):
        timeout = 5
        while timeout:
            try:
                app.get_specification()
            except skein.ConnectionError:
                raise
            else:
                # Didn't fail, try again later
                time.sleep(0.1)
                timeout -= 0.1

    running_apps = client.get_applications()
    assert app.id not in {a.id for a in running_apps}

    finished_apps = client.get_applications(states=['finished'])
    assert app.id in {a.id for a in finished_apps}
Exemple #25
0
def test_set_application_progress(client):
    with run_application(client) as app:
        app.set_progress(0.5)
        # Give the allocate loop time to update
        time.sleep(2)
        report = client.application_report(app.id)
        assert report.progress == 0.5

        with pytest.raises(ValueError):
            app.set_progress(-0.5)

        with pytest.raises(ValueError):
            app.set_progress(1.5)

        app.shutdown()
Exemple #26
0
def test_move_application(client):
    spec = skein.ApplicationSpec(name="test_move_application",
                                 queue="default",
                                 master=skein.Master(script="sleep infinity"))

    def assert_good_message(msg):
        # Ensure message doesn't contain traceback
        assert 'org.apache.hadoop' not in str(msg)

    with run_application(client, spec=spec) as app:
        assert client.application_report(app.id).queue == "default"

        # Successful move
        client.move_application(app.id, "apples")
        assert client.application_report(app.id).queue == "apples"

        # Not a leaf queue
        with pytest.raises(ValueError) as exc:
            client.move_application(app.id, "fruit")
        assert 'Leaf' in str(exc.value)
        assert_good_message(exc.value)

        # Queue doesn't exist
        with pytest.raises(ValueError) as exc:
            client.move_application(app.id, "missing")
        assert "doesn't exist" in str(exc.value)
        assert_good_message(exc.value)

        app.shutdown()

    # App already shutdown
    with pytest.raises(ValueError) as exc:
        client.move_application(app.id, "default")
    assert "cannot be moved" in str(exc.value)
    assert_good_message(exc.value)

    # App doesn't exist
    missing_appid = 'application_1526134340424_0012'
    with pytest.raises(ValueError) as exc:
        client.move_application(missing_appid, "default")
    # This error message is different in Hadoop 3
    assert "absent" in str(exc.value) or "doesn't exist" in str(exc.value)
    assert_good_message(exc.value)

    # Invalid application id
    with pytest.raises(ValueError) as exc:
        client.move_application("oh no", "default")
    assert "Invalid" in str(exc.value)
Exemple #27
0
def test_custom_log4j_properties(client, tmpdir):
    configpath = str(tmpdir.join("log4j.properties"))
    service = skein.Service(resources=skein.Resources(memory=128, vcores=1),
                            script='ls')
    spec = skein.ApplicationSpec(name="test_custom_log4j_properties",
                                 queue="default",
                                 master=skein.Master(log_config=configpath),
                                 services={'service': service})
    with open(configpath, 'w') as f:
        f.write(custom_log4j_properties)

    with run_application(client, spec=spec) as app:
        assert wait_for_completion(client, app.id) == 'SUCCEEDED'

    logs = get_logs(app.id)
    assert 'CUSTOM-LOG4J-SUCCEEDED' in logs
Exemple #28
0
def test_master_driver_foo(client, tmpdir):
    filpath = str(tmpdir.join("dummy-file"))
    with open(filpath, 'w') as fil:
        fil.write('foobar')

    spec = skein.ApplicationSpec(name="test_master_driver",
                                 master=skein.Master(script='ls\nenv',
                                                     env={'FOO': 'BAR'},
                                                     files={'myfile':
                                                            filpath}))
    with run_application(client, spec=spec, connect=False) as app_id:
        assert wait_for_completion(client, app_id) == 'SUCCEEDED'

    logs = get_logs(app_id)
    assert 'FOO=BAR' in logs
    assert 'myfile' in logs
Exemple #29
0
def test_retries_succeeds(client):
    hdfs = pytest.importorskip('pyarrow.hdfs')

    spec = skein.ApplicationSpec(
        name="test_application_retries_succeeds",
        max_attempts=2,
        master=skein.Master(script=test_retries_script_template.format(
            succeed_on='02')))
    with run_application(client, spec=spec, connect=False) as app_id:
        assert wait_for_completion(client, app_id) == 'SUCCEEDED'
    logs = get_logs(app_id)
    assert 'Failing on other attempts' in logs
    assert 'Application attempt 1 out of 2 failed, will retry' in logs
    assert 'Succeeding on attempt 02' in logs

    fs = hdfs.connect()
    assert not fs.exists("/user/testuser/.skein/%s" % app_id)
Exemple #30
0
def test_events_application_shutdown(client):
    with run_application(client) as app:
        q = app.kv.events(prefix='a')
        app.kv.put('a1', b'1')
        app.kv.put('a2', b'2')
        app.kv.put('a3', b'3')
        app.shutdown()

    filt = next(iter(q.filters))
    assert q.get() == put_event('a1', b'1', None, filt)
    assert q.get() == put_event('a2', b'2', None, filt)
    assert q.get() == put_event('a3', b'3', None, filt)

    # All further requests error with connection error
    for _ in range(2):
        with pytest.raises(ConnectionError):
            q.get()