Beispiel #1
0
    def _build_specification(self, cluster, cert_path, key_path):
        files = {
            k: skein.File.from_dict(v) if isinstance(v, dict) else v
            for k, v in cluster.config.localize_files.items()
        }

        files["dask.crt"] = cert_path
        files["dask.pem"] = key_path

        scheduler_cmd = " ".join(self.get_scheduler_command(cluster))
        worker_cmd = " ".join(
            self.get_worker_command(
                cluster,
                worker_name="$DASK_GATEWAY_WORKER_NAME",
                scheduler_address="$DASK_GATEWAY_SCHEDULER_ADDRESS",
            ))
        scheduler_script = f"{cluster.config.scheduler_setup}\n{scheduler_cmd}"
        worker_script = f"{cluster.config.worker_setup}\n{worker_cmd}"

        master = skein.Master(
            security=self._get_security(cluster),
            resources=skein.Resources(
                memory="%d b" % cluster.config.scheduler_memory,
                vcores=cluster.config.scheduler_cores,
            ),
            files=files,
            env=self.get_scheduler_env(cluster),
            script=scheduler_script,
        )

        services = {
            "dask.worker":
            skein.Service(
                resources=skein.Resources(
                    memory="%d b" % cluster.config.worker_memory,
                    vcores=cluster.config.worker_cores,
                ),
                instances=0,
                max_restarts=0,
                allow_failures=True,
                files=files,
                env=self.get_worker_env(cluster),
                script=worker_script,
            )
        }

        return skein.ApplicationSpec(
            name="dask-gateway",
            queue=cluster.config.queue,
            user=cluster.username,
            master=master,
            services=services,
        )
Beispiel #2
0
def test_allow_failures_max_restarts(client, allow_failures):
    name = "test_max_restarts_allow_failures_%s" % str(allow_failures).lower()
    spec = skein.ApplicationSpec(
        name=name,
        master=skein.Master(
            script="sleep infinity"
        ),
        services={
            'myservice': skein.Service(
                instances=1,
                max_restarts=2,
                allow_failures=allow_failures,
                resources=skein.Resources(memory=128, vcores=1),
                script="exit 1"
            )
        }
    )
    with run_application(client, spec=spec) as app:
        if allow_failures:
            # Service failed 3 times, doesn't keep trying to run more
            wait_for_containers(app, 3, states=['FAILED'])
            # Check still running fine after 3 failures
            time.sleep(0.5)
            app.get_specification()
            # Shutdown manually
            app.shutdown()
            assert wait_for_completion(client, app.id) == 'SUCCEEDED'
        else:
            # Service failed 3 times and then terminates
            assert wait_for_completion(client, app.id) == 'FAILED'
Beispiel #3
0
def test_fail_on_container_failure(client, with_restarts):
    script = ('if [[ "$SKEIN_CONTAINER_ID" != "test_0" ]]; then\n'
              '  exit 1\n'
              'else\n'
              '  sleep infinity\n'
              'fi')

    spec = skein.ApplicationSpec(
        name="test_fail_on_container_failure",
        services={
            'test':
            skein.Service(instances=2,
                          max_restarts=2 if with_restarts else 0,
                          resources=skein.Resources(memory=32, vcores=1),
                          script=script)
        })
    with run_application(client, spec=spec) as app:
        wait_for_completion(client, app.id) == "FAILED"

    logs = get_logs(app.id)
    assert "test_0" in logs
    assert "test_1" in logs
    assert ("test_2" in logs) == with_restarts
    assert ("test_3" in logs) == with_restarts
    assert "test_4" not in logs
Beispiel #4
0
def _make_submit_specification(script, args=(), **kwargs):
    spec = _make_specification(**kwargs)

    environment = lookup(kwargs, 'environment', 'yarn.environment')
    files, build_script = _files_and_build_script(environment)

    if 'dask.scheduler' in spec.services:
        # deploy_mode == 'remote'
        client_vcores = lookup(kwargs, 'client_vcores', 'yarn.client.vcores')
        client_memory = lookup(kwargs, 'client_memory', 'yarn.client.memory')
        client_env = lookup(kwargs, 'client_env', 'yarn.client.env')
        client_memory = parse_memory(client_memory, 'client')

        script_name = os.path.basename(script)
        files[script_name] = script

        spec.services['dask.client'] = skein.Service(
            instances=1,
            resources=skein.Resources(vcores=client_vcores,
                                      memory=client_memory),
            max_restarts=0,
            depends=['dask.scheduler'],
            files=files,
            env=client_env,
            script=build_script('services client %s %s' %
                                (script_name, ' '.join(args))))
    return spec
Beispiel #5
0
def _make_submit_specification(script, args=(), **kwargs):
    client_vcores = lookup(kwargs, 'client_vcores', 'yarn.client.vcores')
    client_memory = lookup(kwargs, 'client_memory', 'yarn.client.memory')
    client_env = lookup(kwargs, 'client_env', 'yarn.client.env')
    client_memory = parse_memory(client_memory, 'client')

    spec = _make_specification(**kwargs)
    environment = spec.services['dask.worker'].files['environment']

    script_name = os.path.basename(script)

    spec.services['dask.client'] = skein.Service(
        instances=1,
        resources=skein.Resources(vcores=client_vcores, memory=client_memory),
        max_restarts=0,
        depends=['dask.scheduler'],
        files={
            'environment': environment,
            script_name: script
        },
        env=client_env,
        commands=[
            'source environment/bin/activate',
            'dask-yarn services client %s %s' % (script_name, ' '.join(args))
        ])
    return spec
Beispiel #6
0
def _make_submit_specification(script, args=(), **kwargs):
    spec = _make_specification(**kwargs)

    environment = lookup(kwargs, "environment", "yarn.environment")
    files, build_script = _files_and_build_script(environment)

    if "dask.scheduler" in spec.services:
        # deploy_mode == 'remote'
        client_vcores = lookup(kwargs, "client_vcores", "yarn.client.vcores")
        client_memory = lookup(kwargs, "client_memory", "yarn.client.memory")
        client_env = lookup(kwargs, "client_env", "yarn.client.env")
        client_memory = parse_memory(client_memory, "client")

        script_name = os.path.basename(script)
        files[script_name] = script

        spec.services["dask.client"] = skein.Service(
            instances=1,
            resources=skein.Resources(vcores=client_vcores,
                                      memory=client_memory),
            max_restarts=0,
            depends=["dask.scheduler"],
            files=files,
            env=client_env,
            script=build_script("services client %s %s" %
                                (script_name, " ".join(args))),
        )
    return spec
Beispiel #7
0
def test_container_environment(client, has_kerberos_enabled):
    commands = [
        'env', 'echo "LOGIN_ID=[$(whoami)]"',
        'hdfs dfs -touchz /user/testuser/test_container_permissions'
    ]
    service = skein.Service(resources=skein.Resources(memory=124, vcores=1),
                            commands=commands)
    spec = skein.ApplicationSpec(name="test_container_permissions",
                                 queue="default",
                                 services={'service': service})

    with run_application(client, spec=spec) as app:
        wait_for_success(client, app.id)

    logs = get_logs(app.id)
    assert "USER=testuser" in logs
    assert 'SKEIN_APPMASTER_ADDRESS=' in logs
    assert 'SKEIN_APPLICATION_ID=%s' % app.id in logs
    assert 'SKEIN_CONTAINER_ID=service_0' in logs
    assert 'SKEIN_RESOURCE_MEMORY=128' in logs
    assert 'SKEIN_RESOURCE_VCORES=1' in logs

    if has_kerberos_enabled:
        assert "LOGIN_ID=[testuser]" in logs
        assert "HADOOP_USER_NAME" not in logs
    else:
        assert "LOGIN_ID=[yarn]" in logs
        assert "HADOOP_USER_NAME" in logs
Beispiel #8
0
def test_webui_acls(client, has_kerberos_enabled, ui_users, checks):
    if has_kerberos_enabled:
        pytest.skip("Testing only implemented for simple authentication")

    service = skein.Service(resources=skein.Resources(memory=128, vcores=1),
                            commands=['sleep infinity'])
    spec = skein.ApplicationSpec(name="test_webui_acls",
                                 queue="default",
                                 acls=skein.ACLs(enable=True,
                                                 ui_users=ui_users),
                                 services={'sleeper': service})

    with run_application(client, spec=spec) as app:
        # Wait for a single container
        initial = wait_for_containers(app, 1, states=['RUNNING'])
        assert initial[0].state == 'RUNNING'
        assert initial[0].service_name == 'sleeper'

        # Base url of web ui
        base = 'http://master.example.com:8088/proxy/%s' % app.id

        # Check proper subset of users allowed
        for user, ok in checks:
            resp = get_page(base + "?user.name=%s" % user)
            assert resp.ok == ok

        app.shutdown()
Beispiel #9
0
def test_specification():
    spawner = YarnSpawner(hub=Hub(), user=MockUser())

    spawner.queue = 'myqueue'
    spawner.prologue = 'Do this first'
    spawner.epilogue = 'Do this after'
    spawner.mem_limit = '1 G'
    spawner.cpu_limit = 2
    spawner.localize_files = {
        'environment': 'environment.tar.gz',
        'file2': {
            'source': 'path/to/file',
            'visibility': 'public'
        }
    }
    spawner.environment = {'TEST_ENV_VAR': 'TEST_VALUE'}

    spec = spawner._build_specification()

    assert spec.user == 'myname'
    assert spec.queue == 'myqueue'

    assert 'Do this first\n' in spec.master.script
    assert 'python -m yarnspawner.singleuser' in spec.master.script
    assert 'Do this after' in spec.master.script

    assert spec.master.resources == skein.Resources(memory='1 GiB', vcores=2)

    assert 'environment' in spec.master.files
    assert 'file2' in spec.master.files
    assert spec.master.files['file2'].visibility == 'public'

    assert 'TEST_ENV_VAR' in spec.master.env
    assert 'JUPYTERHUB_API_TOKEN' in spec.master.env
Beispiel #10
0
def test_proxy_user(client):
    hdfs = pytest.importorskip('pyarrow.hdfs')

    spec = skein.ApplicationSpec(name="test_proxy_user",
                                 user="******",
                                 services={
                                     "service":
                                     skein.Service(resources=skein.Resources(
                                         memory=32, vcores=1),
                                                   script="sleep infinity")
                                 })
    with run_application(client, spec=spec) as app:
        spec2 = app.get_specification()
        client.kill_application(app.id, user="******")

    # Alice used throughout process
    assert spec2.user == 'alice'
    for fil in spec2.services['service'].files.values():
        assert fil.source.startswith(
            'hdfs://master.example.com:9000/user/alice')

    # Can get logs as user
    logs = get_logs(client, app.id, user="******")
    assert app.id in logs
    assert "application.master.log" in logs

    # Application directory deleted after kill
    fs = hdfs.connect()
    assert not fs.exists("/user/testuser/.skein/%s" % app.id)
Beispiel #11
0
def test_client_errors_nicely_if_not_logged_in(security, not_logged_in):
    appid = 'application_1526134340424_0012'

    spec = skein.ApplicationSpec(name="should_never_get_to_run",
                                 queue="default",
                                 services={
                                     'service':
                                     skein.Service(resources=skein.Resources(
                                         memory=32, vcores=1),
                                                   script='env')
                                 })

    with skein.Client(security=security) as client:
        for func, args in [('get_applications', ()), ('get_nodes', ()),
                           ('get_queue', ('default', )),
                           ('get_child_queues', ('default', )),
                           ('get_all_queues', ()),
                           ('application_report', (appid, )),
                           ('connect', (appid, )),
                           ('move_application', (appid, 'default')),
                           ('kill_application', (appid, )),
                           ('submit', (spec, ))]:
            with pytest.raises(skein.DriverError) as exc:
                getattr(client, func)(*args)
            assert 'kinit' in str(exc.value)
Beispiel #12
0
def test_master_driver_shutdown_sequence(kind, master_cmd, service_cmd,
                                         client, tmpdir):
    spec = skein.ApplicationSpec(
        name="test_master_driver_shutdown_sequence_%s" % kind,
        master=skein.Master(script=master_cmd),
        services={
            'service': skein.Service(
                resources=skein.Resources(memory=128, vcores=1),
                script=service_cmd
            )
        }
    )

    state = 'SUCCEEDED' if kind.endswith('succeeds') else 'FAILED'

    if kind == 'service_succeeds':
        with run_application(client, spec=spec) as app:
            wait_for_containers(app, 1, states=['SUCCEEDED'])
            assert len(app.get_containers()) == 0
            # App hangs around until driver completes
            app.shutdown()
            assert wait_for_completion(client, app.id) == state
    else:
        with run_application(client, spec=spec, connect=False) as app_id:
            # service_fails results in immediate failure
            # driver_succeeds results in immediate success
            # driver_fails results in immediate failure
            assert wait_for_completion(client, app_id) == state
Beispiel #13
0
    def _build_specification(self):
        script = self.script_template.format(
            prologue=self.prologue,
            singleuser_command=self.singleuser_command,
            epilogue=self.epilogue)

        resources = skein.Resources(memory='%d b' % self.mem_limit,
                                    vcores=self.cpu_limit)

        security = skein.Security.new_credentials()

        # Support dicts as well as File objects
        files = {
            k: skein.File.from_dict(v) if isinstance(v, dict) else v
            for k, v in self.localize_files.items()
        }

        master = skein.Master(resources=resources,
                              files=files,
                              env=self.get_env(),
                              script=script,
                              security=security)

        return skein.ApplicationSpec(
            name='jupyterhub',
            queue=self.queue,
            user=self.user.name,
            master=master,
            delegation_token_providers=self.delegation_token_providers)
Beispiel #14
0
def test_node_locality(client, strict):
    if strict:
        relax_locality = False
        nodes = ['worker.example.com']
        racks = []
    else:
        relax_locality = True
        nodes = ['not.a.real.host.name']
        racks = ['not.a.real.rack.name']

    service = skein.Service(
        resources=skein.Resources(memory=128, vcores=1),
        script='sleep infinity',
        nodes=nodes,
        racks=racks,
        relax_locality=relax_locality
    )
    spec = skein.ApplicationSpec(name="test_node_locality",
                                 queue="default",
                                 services={"service": service})
    with run_application(client, spec=spec) as app:
        wait_for_containers(app, 1, states=['RUNNING'])
        spec2 = app.get_specification()
        app.shutdown()

    service2 = spec2.services['service']
    assert service2.nodes == nodes
    assert service2.racks == racks
    assert service2.relax_locality == relax_locality
def launch_remote_check(file: str) -> Tuple[bool, str]:
    logging.info('Launching remote check')
    zip_hdfs, _ = cluster_pack.upload_env(packer=cluster_pack.PEX_PACKER)
    archive_name = os.path.basename(zip_hdfs)
    with skein.Client() as client:
        files = {
            archive_name: zip_hdfs,
            'check_hadoop_env.py': __file__,
        }
        editable_packages = cluster_pack.get_editable_requirements()
        if 'tf_yarn' in editable_packages:
            tf_yarn_zip = cluster_pack.zip_path(editable_packages['tf_yarn'],
                                                False)
            logger.info(f"zip path for editable tf_yarn is {tf_yarn_zip}")
            files.update({'tf_yarn': tf_yarn_zip})
        service = skein.Service(
            script=f'./{archive_name} check_hadoop_env.py --file {file}',
            resources=skein.Resources(2 * 1024, 1),
            env={
                'PEX_ROOT': '/tmp/{uuid.uuid4()}/',
                'PYTHONPATH': '.:',
            },
            files=files,
            instances=1)
        spec = skein.ApplicationSpec(
            {'HADOOP_ENV_CHECKER': service},
            acls=skein.model.ACLs(enable=True, view_users=['*']),
        )
        app = client.submit_and_connect(spec)

        logging.info('Remote check started')
        result = app.kv.wait('result').decode()
        app_id = app.id
        app.shutdown()
        return result == "True", app_id
Beispiel #16
0
def test_memory_limit_exceeded(kind, client):
    resources = skein.Resources(memory=128, vcores=1)
    # Allocate noticeably more memory than the 128 MB limit
    script = 'python -c "b = bytearray(int(256e6)); import time; time.sleep(10)"'

    master = services = None
    if kind == 'master':
        master = skein.Master(resources=resources, script=script)
        search_txt = "memory limit"
    else:
        services = {
            'service': skein.Service(resources=resources, script=script)
        }
        search_txt = "memory used"
    spec = skein.ApplicationSpec(name="test_memory_limit_exceeded_%s" % kind,
                                 queue="default",
                                 master=master,
                                 services=services)
    with run_application(client, spec=spec, connect=False) as app_id:
        assert wait_for_completion(client, app_id) == "FAILED"
    logs = get_logs(app_id)
    assert search_txt in logs

    if kind == 'master':
        report = client.application_report(app_id)
        assert 'memory limit' in report.diagnostics
Beispiel #17
0
def test_security_specified(client):
    security = skein.Security.new_credentials()
    spec = skein.ApplicationSpec(name="test_security_specified",
                                 master=skein.Master(security=security),
                                 services={
                                     'sleeper':
                                     skein.Service(resources=skein.Resources(
                                         memory=128, vcores=1),
                                                   commands=['sleep infinity'])
                                 })
    with run_application(client, spec=spec) as app:
        assert app.security is security
        assert app.security != client.security

        spec2 = app.get_specification()

        app2 = client.connect(app.id, security=security)
        # Smoketest, can communicate
        app2.get_specification()

        app3 = client.connect(app.id)
        with pytest.raises(skein.ConnectionError):
            # Improper security credentials
            app3.get_specification()

        app.shutdown()

    remote_security = spec2.master.security
    assert remote_security.cert_bytes is None
    assert remote_security.key_bytes is None
    assert remote_security.cert_file.source.startswith('hdfs')
    assert remote_security.key_file.source.startswith('hdfs')
Beispiel #18
0
    def _build_specification(self, cluster_info, cert_path, key_path):
        files = {
            k: skein.File.from_dict(v) if isinstance(v, dict) else v
            for k, v in self.localize_files.items()
        }

        files["dask.crt"] = cert_path
        files["dask.pem"] = key_path

        env = self.get_env(cluster_info)

        scheduler_script = "\n".join(
            [self.scheduler_setup, self.scheduler_command])
        worker_script = "\n".join([self.worker_setup, self.worker_command])

        master = skein.Master(
            security=self._get_security(cluster_info),
            resources=skein.Resources(memory="%d b" % self.scheduler_memory,
                                      vcores=self.scheduler_cores),
            files=files,
            env=env,
            script=scheduler_script,
        )

        services = {
            "dask.worker":
            skein.Service(
                resources=skein.Resources(memory="%d b" % self.worker_memory,
                                          vcores=self.worker_cores),
                instances=0,
                max_restarts=0,
                allow_failures=True,
                files=files,
                env=env,
                script=worker_script,
            )
        }

        return skein.ApplicationSpec(
            name="dask-gateway",
            queue=self.queue,
            user=cluster_info.username,
            master=master,
            services=services,
        )
Beispiel #19
0
def test_hadoop3_resource(client):
    spec = skein.ApplicationSpec(name="test_hadoop3_resources",
                                 master=skein.Master(resources=skein.Resources(
                                     memory='32 MiB', vcores=1, gpus=1),
                                                     script="sleep infinity"))
    with pytest.raises(ValueError) as exc:
        client.submit(spec)
    if HADOOP3:
        assert "Resource 'yarn.io/gpu'" in str(exc.value)
    else:
        assert "Custom resources not supported"
Beispiel #20
0
def test_file_systems(client):
    script = 'hdfs dfs -touchz /user/testuser/test_file_systems'
    service = skein.Service(resources=skein.Resources(memory=128, vcores=1),
                            script=script)
    spec = skein.ApplicationSpec(name="test_file_systems",
                                 queue="default",
                                 services={'service': service},
                                 file_systems=["hdfs://master.example.com:9000"])

    with run_application(client, spec=spec) as app:
        assert wait_for_completion(client, app.id) == 'SUCCEEDED'
Beispiel #21
0
def create_skein_app():
    service = skein.Service(commands=['./example.pex distributed.py --server'],
                            resources=skein.Resources(2 * 1024, 1),
                            env={'PEX_ROOT': '/tmp/{uuid.uuid4()}/'},
                            files={
                                'example.pex': 'example.pex',
                                'distributed.py': __file__
                            },
                            instances=2)
    spec = skein.ApplicationSpec({NODE_NAME: service}, queue='dev')
    return spec
Beispiel #22
0
def test_add_container(client):
    script = ('echo "$SKEIN_CONTAINER_ID - MYENV=$MYENV"\n'
              'echo "$SKEIN_CONTAINER_ID - MYENV2=$MYENV2"\n'
              'if [[ "$MYENV" == "bar" ]]; then\n'
              '  exit 1\n'
              'else\n'
              '  exit 0\n'
              'fi')

    spec = skein.ApplicationSpec(name="test_add_container",
                                 master=skein.Master(script="sleep infinity"),
                                 services={
                                     'test':
                                     skein.Service(instances=0,
                                                   resources=skein.Resources(
                                                       memory=32, vcores=1),
                                                   env={
                                                       'MYENV': 'foo',
                                                       'MYENV2': 'baz'
                                                   },
                                                   max_restarts=1,
                                                   script=script)
                                 })

    with run_application(client, spec=spec) as app:
        # Add container with new overrides
        c = app.add_container('test')
        assert c.instance == 0
        wait_for_containers(app, 1, states=['RUNNING', 'SUCCEEDED'])

        # Non-existant service
        with pytest.raises(ValueError):
            app.add_container('foobar')

        # Add container with override for MYENV
        c = app.add_container('test', {'MYENV': 'bar'})
        assert c.instance == 1

        # The new env var triggers a failure, should fail twice,
        # then fail the whole application
        assert wait_for_completion(client, app.id) == 'FAILED'

    logs = get_logs(app.id)
    assert "test_0 - MYENV=foo" in logs
    assert "test_0 - MYENV2=baz" in logs

    assert "test_1 - MYENV=bar" in logs
    assert "test_1 - MYENV2=baz" in logs

    assert "test_2 - MYENV=bar" in logs
    assert "test_2 - MYENV2=baz" in logs

    assert "test_3" not in logs
Beispiel #23
0
def test_file_systems(client):
    commands = ['hdfs dfs -touchz /user/testuser/test_file_systems']
    service = skein.Service(resources=skein.Resources(memory=124, vcores=1),
                            commands=commands)
    spec = skein.ApplicationSpec(
        name="test_file_systems",
        queue="default",
        services={'service': service},
        file_systems=["hdfs://master.example.com:9000"])

    with run_application(client, spec=spec) as app:
        wait_for_success(client, app.id)
Beispiel #24
0
def test_set_log_level(client):
    service = skein.Service(resources=skein.Resources(memory=128, vcores=1),
                            script='ls')
    spec = skein.ApplicationSpec(name="test_custom_log4j_properties",
                                 queue="default",
                                 master=skein.Master(log_level='debug'),
                                 services={'service': service})

    with run_application(client, spec=spec) as app:
        assert wait_for_completion(client, app.id) == 'SUCCEEDED'

    logs = get_logs(app.id)
    assert 'DEBUG' in logs
Beispiel #25
0
def test_memory_limit_exceeded(client):
    # Allocate noticeably more memory than the 128 MB limit
    service = skein.Service(
        resources=skein.Resources(memory=128, vcores=1),
        commands=[
            'python -c "b = bytearray(int(256e6)); import time; time.sleep(10)"'
        ])
    spec = skein.ApplicationSpec(name="test_memory_limit_exceeded",
                                 queue="default",
                                 services={"service": service})
    with run_application(client, spec=spec) as app:
        assert wait_for_completion(client, app.id) == "FAILED"
    logs = get_logs(app.id)
    assert "memory used" in logs
Beispiel #26
0
def test_proxy_user_no_permissions(client):
    spec = skein.ApplicationSpec(name="test_proxy_user_no_permissions",
                                 user="******",
                                 services={
                                     'service':
                                     skein.Service(resources=skein.Resources(
                                         memory=128, vcores=1),
                                                   commands=['env'])
                                 })
    # No permission to submit as user
    with pytest.raises(skein.DriverError) as exc:
        client.submit(spec)

    exc_msg = str(exc.value)
    assert 'testuser' in exc_msg
    assert 'bob' in exc_msg
Beispiel #27
0
def test_custom_log4j_properties(client, tmpdir):
    configpath = str(tmpdir.join("log4j.properties"))
    service = skein.Service(resources=skein.Resources(memory=128, vcores=1),
                            script='ls')
    spec = skein.ApplicationSpec(name="test_custom_log4j_properties",
                                 queue="default",
                                 master=skein.Master(log_config=configpath),
                                 services={'service': service})
    with open(configpath, 'w') as f:
        f.write(custom_log4j_properties)

    with run_application(client, spec=spec) as app:
        assert wait_for_completion(client, app.id) == 'SUCCEEDED'

    logs = get_logs(app.id)
    assert 'CUSTOM-LOG4J-SUCCEEDED' in logs
Beispiel #28
0
def test_proxy_user_no_permissions(client, hadoop3):
    if hadoop3:
        pytest.skip("Lack of proxyuser permissions causes "
                    "yarnclient to hang in hadoop3")
    spec = skein.ApplicationSpec(name="test_proxy_user_no_permissions",
                                 user="******",
                                 services={
                                     'service':
                                     skein.Service(resources=skein.Resources(
                                         memory=32, vcores=1),
                                                   script='env')
                                 })
    # No permission to submit as user
    with pytest.raises(skein.DriverError) as exc:
        client.submit(spec)

    exc_msg = str(exc.value)
    assert 'testuser' in exc_msg
    assert 'bob' in exc_msg
Beispiel #29
0
def test_container_permissions(client, has_kerberos_enabled):
    commands = [
        'echo "USER_ENV=[$USER]"', 'echo "LOGIN_ID=[$(whoami)]"',
        'hdfs dfs -touchz /user/testuser/test_container_permissions'
    ]
    service = skein.Service(resources=skein.Resources(memory=128, vcores=1),
                            commands=commands)
    spec = skein.ApplicationSpec(name="test_container_permissions",
                                 queue="default",
                                 services={'service': service})

    with run_application(client, spec=spec) as app:
        wait_for_success(app)

    logs = get_logs(app.app_id)
    assert "USER_ENV=[testuser]" in logs
    if has_kerberos_enabled:
        assert "LOGIN_ID=[testuser]" in logs
    else:
        assert "LOGIN_ID=[yarn]" in logs
Beispiel #30
0
    async def post(self):
        # Extract request parameters
        queue = self.get_argument('queue') or 'default'
        memory = float(self.get_argument('memory'))
        vcores = int(self.get_argument('vcores'))
        try:
            script = self.request.files['script'][0]
        except (IndexError, KeyError):
            raise web.HTTPError(400, reason="Missing script")

        # Check memory and vcores are in bounds
        if memory < 0.5 or memory > 8:
            raise web.HTTPError("0.5 <= memory <= 8 required")
        if vcores < 1 or vcores > 4:
            raise web.HTTPError("1 <= vcores <= 4 required")

        # We need to write the script temporarily to disk so Skein can upload it
        with tempfile.NamedTemporaryFile() as f:
            f.write(script['body'])
            f.file.flush()

            # ** Construct the application specification **
            # Note that we specify the user as user logged in to the web page.
            # If kerberos authentication was used, this would match the user's
            # principal.
            spec = skein.ApplicationSpec(
                name="pyscript",
                queue=queue,
                user=self.current_user,
                master=skein.Master(resources=skein.Resources(memory="%f GiB" %
                                                              memory,
                                                              vcores=vcores),
                                    files={script['filename']: f.name},
                                    script="python %s" % script['filename']))

            # Submit the application and get a report
            report = await ioloop.IOLoop.current().run_in_executor(
                None, self.submit_and_report, spec)

        # Redirect the user to the application's tracking url
        self.redirect(report.tracking_url)