def test_master_driver_shutdown_sequence(kind, master_cmd, service_cmd, client, tmpdir): spec = skein.ApplicationSpec( name="test_master_driver_shutdown_sequence_%s" % kind, master=skein.Master(script=master_cmd), services={ 'service': skein.Service( resources=skein.Resources(memory=128, vcores=1), script=service_cmd ) } ) state = 'SUCCEEDED' if kind.endswith('succeeds') else 'FAILED' if kind == 'service_succeeds': with run_application(client, spec=spec) as app: wait_for_containers(app, 1, states=['SUCCEEDED']) assert len(app.get_containers()) == 0 # App hangs around until driver completes app.shutdown() assert wait_for_completion(client, app.id) == state else: with run_application(client, spec=spec, connect=False) as app_id: # service_fails results in immediate failure # driver_succeeds results in immediate success # driver_fails results in immediate failure assert wait_for_completion(client, app_id) == state
def test_memory_limit_exceeded(kind, client): resources = skein.Resources(memory=128, vcores=1) # Allocate noticeably more memory than the 128 MB limit script = 'python -c "b = bytearray(int(256e6)); import time; time.sleep(10)"' master = services = None if kind == 'master': master = skein.Master(resources=resources, script=script) search_txt = "memory limit" else: services = { 'service': skein.Service(resources=resources, script=script) } search_txt = "memory used" spec = skein.ApplicationSpec(name="test_memory_limit_exceeded_%s" % kind, queue="default", master=master, services=services) with run_application(client, spec=spec, connect=False) as app_id: assert wait_for_completion(client, app_id) == "FAILED" logs = get_logs(app_id) assert search_txt in logs if kind == 'master': report = client.application_report(app_id) assert 'memory limit' in report.diagnostics
def test_node_locality(client, strict): if strict: relax_locality = False nodes = ['worker.example.com'] racks = [] else: relax_locality = True nodes = ['not.a.real.host.name'] racks = ['not.a.real.rack.name'] service = skein.Service( resources=skein.Resources(memory=128, vcores=1), script='sleep infinity', nodes=nodes, racks=racks, relax_locality=relax_locality ) spec = skein.ApplicationSpec(name="test_node_locality", queue="default", services={"service": service}) with run_application(client, spec=spec) as app: wait_for_containers(app, 1, states=['RUNNING']) spec2 = app.get_specification() app.shutdown() service2 = spec2.services['service'] assert service2.nodes == nodes assert service2.racks == racks assert service2.relax_locality == relax_locality
def test_security_specified(client): security = skein.Security.new_credentials() spec = skein.ApplicationSpec( name="test_security_specified", master=skein.Master(security=security, script='sleep infinity') ) with run_application(client, spec=spec) as app: assert app.security is security assert app.security != client.security spec2 = app.get_specification() app2 = client.connect(app.id, security=security) # Smoketest, can communicate app2.get_specification() app3 = client.connect(app.id) with pytest.raises(skein.ConnectionError): # Improper security credentials app3.get_specification() app.shutdown() remote_security = spec2.master.security assert remote_security.cert_bytes is None assert remote_security.key_bytes is None assert remote_security.cert_file.source.startswith('hdfs') assert remote_security.key_file.source.startswith('hdfs')
def test_fail_on_container_failure(client, with_restarts): script = ('if [[ "$SKEIN_CONTAINER_ID" != "test_0" ]]; then\n' ' exit 1\n' 'else\n' ' sleep infinity\n' 'fi') spec = skein.ApplicationSpec( name="test_fail_on_container_failure", services={ 'test': skein.Service(instances=2, max_restarts=2 if with_restarts else 0, resources=skein.Resources(memory=32, vcores=1), script=script) }) with run_application(client, spec=spec) as app: wait_for_completion(client, app.id) == "FAILED" logs = get_logs(app.id) assert "test_0" in logs assert "test_1" in logs assert ("test_2" in logs) == with_restarts assert ("test_3" in logs) == with_restarts assert "test_4" not in logs
def test_allow_failures_max_restarts(client, allow_failures): name = "test_max_restarts_allow_failures_%s" % str(allow_failures).lower() spec = skein.ApplicationSpec( name=name, master=skein.Master( script="sleep infinity" ), services={ 'myservice': skein.Service( instances=1, max_restarts=2, allow_failures=allow_failures, resources=skein.Resources(memory=128, vcores=1), script="exit 1" ) } ) with run_application(client, spec=spec) as app: if allow_failures: # Service failed 3 times, doesn't keep trying to run more wait_for_containers(app, 3, states=['FAILED']) # Check still running fine after 3 failures time.sleep(0.5) app.get_specification() # Shutdown manually app.shutdown() assert wait_for_completion(client, app.id) == 'SUCCEEDED' else: # Service failed 3 times and then terminates assert wait_for_completion(client, app.id) == 'FAILED'
def test_simple_app(client): with run_application(client) as app: # smoketest repr repr(app) # Test get_specification a = app.get_specification() assert isinstance(a, skein.ApplicationSpec) assert 'sleeper' in a.services assert client.application_report(app.id).state == 'RUNNING' app.shutdown() with pytest.raises(skein.ConnectionError): client.connect(app.id) with pytest.raises(skein.ConnectionError): client.connect(app.id, wait=False) with pytest.raises(skein.ConnectionError): app.get_specification() running_apps = client.get_applications() assert app.id not in {a.id for a in running_apps} finished_apps = client.get_applications(states=['finished']) assert app.id in {a.id for a in finished_apps}
def test_container_environment(client, has_kerberos_enabled): commands = [ 'env', 'echo "LOGIN_ID=[$(whoami)]"', 'hdfs dfs -touchz /user/testuser/test_container_permissions' ] service = skein.Service(resources=skein.Resources(memory=124, vcores=1), commands=commands) spec = skein.ApplicationSpec(name="test_container_permissions", queue="default", services={'service': service}) with run_application(client, spec=spec) as app: wait_for_success(client, app.id) logs = get_logs(app.id) assert "USER=testuser" in logs assert 'SKEIN_APPMASTER_ADDRESS=' in logs assert 'SKEIN_APPLICATION_ID=%s' % app.id in logs assert 'SKEIN_CONTAINER_ID=service_0' in logs assert 'SKEIN_RESOURCE_MEMORY=128' in logs assert 'SKEIN_RESOURCE_VCORES=1' in logs if has_kerberos_enabled: assert "LOGIN_ID=[testuser]" in logs assert "HADOOP_USER_NAME" not in logs else: assert "LOGIN_ID=[yarn]" in logs assert "HADOOP_USER_NAME" in logs
def test_shutdown_app(client): with run_application(client) as app: ac = app.connect() ac.shutdown(status='SUCCEEDED') assert app.status().final_status == 'SUCCEEDED'
def test_webui_acls(client, has_kerberos_enabled, ui_users, checks): if has_kerberos_enabled: pytest.skip("Testing only implemented for simple authentication") service = skein.Service(resources=skein.Resources(memory=128, vcores=1), commands=['sleep infinity']) spec = skein.ApplicationSpec(name="test_webui_acls", queue="default", acls=skein.ACLs(enable=True, ui_users=ui_users), services={'sleeper': service}) with run_application(client, spec=spec) as app: # Wait for a single container initial = wait_for_containers(app, 1, states=['RUNNING']) assert initial[0].state == 'RUNNING' assert initial[0].service_name == 'sleeper' # Base url of web ui base = 'http://master.example.com:8088/proxy/%s' % app.id # Check proper subset of users allowed for user, ok in checks: resp = get_page(base + "?user.name=%s" % user) assert resp.ok == ok app.shutdown()
def test_proxy_user(client): hdfs = pytest.importorskip('pyarrow.hdfs') spec = skein.ApplicationSpec(name="test_proxy_user", user="******", services={ "service": skein.Service(resources=skein.Resources( memory=32, vcores=1), script="sleep infinity") }) with run_application(client, spec=spec) as app: spec2 = app.get_specification() client.kill_application(app.id, user="******") # Alice used throughout process assert spec2.user == 'alice' for fil in spec2.services['service'].files.values(): assert fil.source.startswith( 'hdfs://master.example.com:9000/user/alice') # Can get logs as user logs = get_logs(client, app.id, user="******") assert app.id in logs assert "application.master.log" in logs # Application directory deleted after kill fs = hdfs.connect() assert not fs.exists("/user/testuser/.skein/%s" % app.id)
def test_kill_application_removes_appdir(client): hdfs = pytest.importorskip('pyarrow.hdfs') with run_application(client) as app: client.kill_application(app.id) fs = hdfs.connect() assert not fs.exists("/user/testuser/.skein/%s" % app.id)
def test_describe(client): with run_application(client) as app: ac = app.connect() s = ac.describe(service='sleeper') assert isinstance(s, skein.Service) a = ac.describe() assert isinstance(a, skein.ApplicationSpec) assert a.services['sleeper'] == s
def kv_test_app_persistent(client): with run_application(client) as app: # ensure one container id exists, but already completed app.scale('sleeper', 2) app.kill_container('sleeper_0') try: yield app finally: app.shutdown()
def at_least_3_apps_in_history(client): all_apps = client.get_applications( states=skein.model.ApplicationState.values()) all_expected = [ a for a in all_apps if a.user == 'testuser' and a.queue == 'default' ] for _ in range(3 - len(all_expected)): with run_application(client) as app: app.shutdown()
def test_file_systems(client): script = 'hdfs dfs -touchz /user/testuser/test_file_systems' service = skein.Service(resources=skein.Resources(memory=128, vcores=1), script=script) spec = skein.ApplicationSpec(name="test_file_systems", queue="default", services={'service': service}, file_systems=["hdfs://master.example.com:9000"]) with run_application(client, spec=spec) as app: assert wait_for_completion(client, app.id) == 'SUCCEEDED'
def test_kill_application_removes_appdir(use_skein, client): hdfs = pytest.importorskip('pyarrow.hdfs') with run_application(client) as app: if use_skein: client.kill_application(app.id) else: subprocess.check_call(["yarn", "application", "-kill", app.id]) fs = hdfs.connect() assert not fs.exists("/user/testuser/.skein/%s" % app.id)
def test_file_systems(client): commands = ['hdfs dfs -touchz /user/testuser/test_file_systems'] service = skein.Service(resources=skein.Resources(memory=124, vcores=1), commands=commands) spec = skein.ApplicationSpec( name="test_file_systems", queue="default", services={'service': service}, file_systems=["hdfs://master.example.com:9000"]) with run_application(client, spec=spec) as app: wait_for_success(client, app.id)
def test_add_container(client): script = ('echo "$SKEIN_CONTAINER_ID - MYENV=$MYENV"\n' 'echo "$SKEIN_CONTAINER_ID - MYENV2=$MYENV2"\n' 'if [[ "$MYENV" == "bar" ]]; then\n' ' exit 1\n' 'else\n' ' exit 0\n' 'fi') spec = skein.ApplicationSpec(name="test_add_container", master=skein.Master(script="sleep infinity"), services={ 'test': skein.Service(instances=0, resources=skein.Resources( memory=32, vcores=1), env={ 'MYENV': 'foo', 'MYENV2': 'baz' }, max_restarts=1, script=script) }) with run_application(client, spec=spec) as app: # Add container with new overrides c = app.add_container('test') assert c.instance == 0 wait_for_containers(app, 1, states=['RUNNING', 'SUCCEEDED']) # Non-existant service with pytest.raises(ValueError): app.add_container('foobar') # Add container with override for MYENV c = app.add_container('test', {'MYENV': 'bar'}) assert c.instance == 1 # The new env var triggers a failure, should fail twice, # then fail the whole application assert wait_for_completion(client, app.id) == 'FAILED' logs = get_logs(app.id) assert "test_0 - MYENV=foo" in logs assert "test_0 - MYENV2=baz" in logs assert "test_1 - MYENV=bar" in logs assert "test_1 - MYENV2=baz" in logs assert "test_2 - MYENV=bar" in logs assert "test_2 - MYENV2=baz" in logs assert "test_3" not in logs
def test_webui(client, has_kerberos_enabled): # Smoke-tests for webui if has_kerberos_enabled: pytest.skip("Testing only implemented for simple authentication") requests = pytest.importorskip('requests') with run_application(client) as app: # Wait for a single container initial = wait_for_containers(app, 1, states=['RUNNING']) assert initial[0].state == 'RUNNING' assert initial[0].service_name == 'sleeper' # Set some key-values app.kv['foo'] = b'bar' app.kv['bad'] = b'\255\255\255' # non-unicode # Base url of web ui base = 'http://master.example.com:8088/proxy/%s' % app.id # Fails without authentication resp = requests.get(base) assert resp.status_code == 401 # With authentication resp = requests.get(base + "?user.name=testuser") assert resp.ok cookies = resp.cookies # / and /services are the same for suffix in ['', '/services']: resp = requests.get(base + suffix, cookies=cookies) assert resp.ok assert 'sleeper_0' in resp.text # list of containers assert '/testuser/sleeper.log' in resp.text # link to logs # /kv store has a few items in it resp = requests.get(base + '/kv', cookies=cookies) assert resp.ok assert 'foo' in resp.text assert 'bar' in resp.text assert 'bad' in resp.text assert '<binary value>' in resp.text # Resources are reachable resp = requests.get(base + '/favicon.ico', cookies=cookies) assert resp.ok # 404 for fake pages resp = requests.get(base + '/not-a-real-page', cookies=cookies) assert resp.status_code == 404 app.shutdown()
def ui_test_app(client, has_kerberos_enabled): if has_kerberos_enabled: pytest.skip("Testing only implemented for simple authentication") with run_application(client, spec=spec) as app: # Wait for a single container wait_for_containers(app, 1, states=['RUNNING']) try: yield app finally: try: app.shutdown() except ConnectionError: client.kill_application(app.id)
def test_set_log_level(client): service = skein.Service(resources=skein.Resources(memory=128, vcores=1), script='ls') spec = skein.ApplicationSpec(name="test_custom_log4j_properties", queue="default", master=skein.Master(log_level='debug'), services={'service': service}) with run_application(client, spec=spec) as app: assert wait_for_completion(client, app.id) == 'SUCCEEDED' logs = get_logs(app.id) assert 'DEBUG' in logs
def test_memory_limit_exceeded(client): # Allocate noticeably more memory than the 128 MB limit service = skein.Service( resources=skein.Resources(memory=128, vcores=1), commands=[ 'python -c "b = bytearray(int(256e6)); import time; time.sleep(10)"' ]) spec = skein.ApplicationSpec(name="test_memory_limit_exceeded", queue="default", services={"service": service}) with run_application(client, spec=spec) as app: assert wait_for_completion(client, app.id) == "FAILED" logs = get_logs(app.id) assert "memory used" in logs
def test_simple_app(client): with run_application(client) as app: # smoketest repr repr(app) # Test get_specification a = app.get_specification() assert isinstance(a, skein.ApplicationSpec) assert 'sleeper' in a.services assert client.application_report(app.id).state == 'RUNNING' # Connect with a new client with client.connect(app.id) as app2: app2.get_specification() # application client is closed with pytest.raises(Exception): app2.get_specification() # Closing an application client is idempotent app2.close() app.shutdown() with pytest.raises(skein.ConnectionError): client.connect(app.id) with pytest.raises(skein.ConnectionError): client.connect(app.id, wait=False) # On Travis CI there can be some lag between application being shutdown and # application actually shutting down. Retry up to 5 seconds before failing. with pytest.raises(skein.ConnectionError): timeout = 5 while timeout: try: app.get_specification() except skein.ConnectionError: raise else: # Didn't fail, try again later time.sleep(0.1) timeout -= 0.1 running_apps = client.get_applications() assert app.id not in {a.id for a in running_apps} finished_apps = client.get_applications(states=['finished']) assert app.id in {a.id for a in finished_apps}
def test_set_application_progress(client): with run_application(client) as app: app.set_progress(0.5) # Give the allocate loop time to update time.sleep(2) report = client.application_report(app.id) assert report.progress == 0.5 with pytest.raises(ValueError): app.set_progress(-0.5) with pytest.raises(ValueError): app.set_progress(1.5) app.shutdown()
def test_move_application(client): spec = skein.ApplicationSpec(name="test_move_application", queue="default", master=skein.Master(script="sleep infinity")) def assert_good_message(msg): # Ensure message doesn't contain traceback assert 'org.apache.hadoop' not in str(msg) with run_application(client, spec=spec) as app: assert client.application_report(app.id).queue == "default" # Successful move client.move_application(app.id, "apples") assert client.application_report(app.id).queue == "apples" # Not a leaf queue with pytest.raises(ValueError) as exc: client.move_application(app.id, "fruit") assert 'Leaf' in str(exc.value) assert_good_message(exc.value) # Queue doesn't exist with pytest.raises(ValueError) as exc: client.move_application(app.id, "missing") assert "doesn't exist" in str(exc.value) assert_good_message(exc.value) app.shutdown() # App already shutdown with pytest.raises(ValueError) as exc: client.move_application(app.id, "default") assert "cannot be moved" in str(exc.value) assert_good_message(exc.value) # App doesn't exist missing_appid = 'application_1526134340424_0012' with pytest.raises(ValueError) as exc: client.move_application(missing_appid, "default") # This error message is different in Hadoop 3 assert "absent" in str(exc.value) or "doesn't exist" in str(exc.value) assert_good_message(exc.value) # Invalid application id with pytest.raises(ValueError) as exc: client.move_application("oh no", "default") assert "Invalid" in str(exc.value)
def test_custom_log4j_properties(client, tmpdir): configpath = str(tmpdir.join("log4j.properties")) service = skein.Service(resources=skein.Resources(memory=128, vcores=1), script='ls') spec = skein.ApplicationSpec(name="test_custom_log4j_properties", queue="default", master=skein.Master(log_config=configpath), services={'service': service}) with open(configpath, 'w') as f: f.write(custom_log4j_properties) with run_application(client, spec=spec) as app: assert wait_for_completion(client, app.id) == 'SUCCEEDED' logs = get_logs(app.id) assert 'CUSTOM-LOG4J-SUCCEEDED' in logs
def test_master_driver_foo(client, tmpdir): filpath = str(tmpdir.join("dummy-file")) with open(filpath, 'w') as fil: fil.write('foobar') spec = skein.ApplicationSpec(name="test_master_driver", master=skein.Master(script='ls\nenv', env={'FOO': 'BAR'}, files={'myfile': filpath})) with run_application(client, spec=spec, connect=False) as app_id: assert wait_for_completion(client, app_id) == 'SUCCEEDED' logs = get_logs(app_id) assert 'FOO=BAR' in logs assert 'myfile' in logs
def test_retries_succeeds(client): hdfs = pytest.importorskip('pyarrow.hdfs') spec = skein.ApplicationSpec( name="test_application_retries_succeeds", max_attempts=2, master=skein.Master(script=test_retries_script_template.format( succeed_on='02'))) with run_application(client, spec=spec, connect=False) as app_id: assert wait_for_completion(client, app_id) == 'SUCCEEDED' logs = get_logs(app_id) assert 'Failing on other attempts' in logs assert 'Application attempt 1 out of 2 failed, will retry' in logs assert 'Succeeding on attempt 02' in logs fs = hdfs.connect() assert not fs.exists("/user/testuser/.skein/%s" % app_id)
def test_events_application_shutdown(client): with run_application(client) as app: q = app.kv.events(prefix='a') app.kv.put('a1', b'1') app.kv.put('a2', b'2') app.kv.put('a3', b'3') app.shutdown() filt = next(iter(q.filters)) assert q.get() == put_event('a1', b'1', None, filt) assert q.get() == put_event('a2', b'2', None, filt) assert q.get() == put_event('a3', b'3', None, filt) # All further requests error with connection error for _ in range(2): with pytest.raises(ConnectionError): q.get()