Beispiel #1
0
def test_container_environment(client, has_kerberos_enabled):
    commands = [
        'env', 'echo "LOGIN_ID=[$(whoami)]"',
        'hdfs dfs -touchz /user/testuser/test_container_permissions'
    ]
    service = skein.Service(resources=skein.Resources(memory=124, vcores=1),
                            commands=commands)
    spec = skein.ApplicationSpec(name="test_container_permissions",
                                 queue="default",
                                 services={'service': service})

    with run_application(client, spec=spec) as app:
        wait_for_success(client, app.id)

    logs = get_logs(app.id)
    assert "USER=testuser" in logs
    assert 'SKEIN_APPMASTER_ADDRESS=' in logs
    assert 'SKEIN_APPLICATION_ID=%s' % app.id in logs
    assert 'SKEIN_CONTAINER_ID=service_0' in logs
    assert 'SKEIN_RESOURCE_MEMORY=128' in logs
    assert 'SKEIN_RESOURCE_VCORES=1' in logs

    if has_kerberos_enabled:
        assert "LOGIN_ID=[testuser]" in logs
        assert "HADOOP_USER_NAME" not in logs
    else:
        assert "LOGIN_ID=[yarn]" in logs
        assert "HADOOP_USER_NAME" in logs
Beispiel #2
0
def test_webui_acls(client, has_kerberos_enabled, ui_users, checks):
    if has_kerberos_enabled:
        pytest.skip("Testing only implemented for simple authentication")

    service = skein.Service(resources=skein.Resources(memory=128, vcores=1),
                            commands=['sleep infinity'])
    spec = skein.ApplicationSpec(name="test_webui_acls",
                                 queue="default",
                                 acls=skein.ACLs(enable=True,
                                                 ui_users=ui_users),
                                 services={'sleeper': service})

    with run_application(client, spec=spec) as app:
        # Wait for a single container
        initial = wait_for_containers(app, 1, states=['RUNNING'])
        assert initial[0].state == 'RUNNING'
        assert initial[0].service_name == 'sleeper'

        # Base url of web ui
        base = 'http://master.example.com:8088/proxy/%s' % app.id

        # Check proper subset of users allowed
        for user, ok in checks:
            resp = get_page(base + "?user.name=%s" % user)
            assert resp.ok == ok

        app.shutdown()
Beispiel #3
0
def _make_submit_specification(script, args=(), **kwargs):
    spec = _make_specification(**kwargs)

    environment = lookup(kwargs, "environment", "yarn.environment")
    files, build_script = _files_and_build_script(environment)

    if "dask.scheduler" in spec.services:
        # deploy_mode == 'remote'
        client_vcores = lookup(kwargs, "client_vcores", "yarn.client.vcores")
        client_memory = lookup(kwargs, "client_memory", "yarn.client.memory")
        client_env = lookup(kwargs, "client_env", "yarn.client.env")
        client_memory = parse_memory(client_memory, "client")

        script_name = os.path.basename(script)
        files[script_name] = script

        spec.services["dask.client"] = skein.Service(
            instances=1,
            resources=skein.Resources(vcores=client_vcores,
                                      memory=client_memory),
            max_restarts=0,
            depends=["dask.scheduler"],
            files=files,
            env=client_env,
            script=build_script("services client %s %s" %
                                (script_name, " ".join(args))),
        )
    return spec
Beispiel #4
0
def test_proxy_user(client):
    hdfs = pytest.importorskip('pyarrow.hdfs')

    spec = skein.ApplicationSpec(name="test_proxy_user",
                                 user="******",
                                 services={
                                     "service":
                                     skein.Service(resources=skein.Resources(
                                         memory=32, vcores=1),
                                                   script="sleep infinity")
                                 })
    with run_application(client, spec=spec) as app:
        spec2 = app.get_specification()
        client.kill_application(app.id, user="******")

    # Alice used throughout process
    assert spec2.user == 'alice'
    for fil in spec2.services['service'].files.values():
        assert fil.source.startswith(
            'hdfs://master.example.com:9000/user/alice')

    # Can get logs as user
    logs = get_logs(client, app.id, user="******")
    assert app.id in logs
    assert "application.master.log" in logs

    # Application directory deleted after kill
    fs = hdfs.connect()
    assert not fs.exists("/user/testuser/.skein/%s" % app.id)
Beispiel #5
0
def test_fail_on_container_failure(client, with_restarts):
    script = ('if [[ "$SKEIN_CONTAINER_ID" != "test_0" ]]; then\n'
              '  exit 1\n'
              'else\n'
              '  sleep infinity\n'
              'fi')

    spec = skein.ApplicationSpec(
        name="test_fail_on_container_failure",
        services={
            'test':
            skein.Service(instances=2,
                          max_restarts=2 if with_restarts else 0,
                          resources=skein.Resources(memory=32, vcores=1),
                          script=script)
        })
    with run_application(client, spec=spec) as app:
        wait_for_completion(client, app.id) == "FAILED"

    logs = get_logs(app.id)
    assert "test_0" in logs
    assert "test_1" in logs
    assert ("test_2" in logs) == with_restarts
    assert ("test_3" in logs) == with_restarts
    assert "test_4" not in logs
Beispiel #6
0
def test_node_locality(client, strict):
    if strict:
        relax_locality = False
        nodes = ['worker.example.com']
        racks = []
    else:
        relax_locality = True
        nodes = ['not.a.real.host.name']
        racks = ['not.a.real.rack.name']

    service = skein.Service(
        resources=skein.Resources(memory=128, vcores=1),
        script='sleep infinity',
        nodes=nodes,
        racks=racks,
        relax_locality=relax_locality
    )
    spec = skein.ApplicationSpec(name="test_node_locality",
                                 queue="default",
                                 services={"service": service})
    with run_application(client, spec=spec) as app:
        wait_for_containers(app, 1, states=['RUNNING'])
        spec2 = app.get_specification()
        app.shutdown()

    service2 = spec2.services['service']
    assert service2.nodes == nodes
    assert service2.racks == racks
    assert service2.relax_locality == relax_locality
Beispiel #7
0
def test_security_specified(client):
    security = skein.Security.new_credentials()
    spec = skein.ApplicationSpec(name="test_security_specified",
                                 master=skein.Master(security=security),
                                 services={
                                     'sleeper':
                                     skein.Service(resources=skein.Resources(
                                         memory=128, vcores=1),
                                                   commands=['sleep infinity'])
                                 })
    with run_application(client, spec=spec) as app:
        assert app.security is security
        assert app.security != client.security

        spec2 = app.get_specification()

        app2 = client.connect(app.id, security=security)
        # Smoketest, can communicate
        app2.get_specification()

        app3 = client.connect(app.id)
        with pytest.raises(skein.ConnectionError):
            # Improper security credentials
            app3.get_specification()

        app.shutdown()

    remote_security = spec2.master.security
    assert remote_security.cert_bytes is None
    assert remote_security.key_bytes is None
    assert remote_security.cert_file.source.startswith('hdfs')
    assert remote_security.key_file.source.startswith('hdfs')
Beispiel #8
0
def test_master_driver_shutdown_sequence(kind, master_cmd, service_cmd,
                                         client, tmpdir):
    spec = skein.ApplicationSpec(
        name="test_master_driver_shutdown_sequence_%s" % kind,
        master=skein.Master(script=master_cmd),
        services={
            'service': skein.Service(
                resources=skein.Resources(memory=128, vcores=1),
                script=service_cmd
            )
        }
    )

    state = 'SUCCEEDED' if kind.endswith('succeeds') else 'FAILED'

    if kind == 'service_succeeds':
        with run_application(client, spec=spec) as app:
            wait_for_containers(app, 1, states=['SUCCEEDED'])
            assert len(app.get_containers()) == 0
            # App hangs around until driver completes
            app.shutdown()
            assert wait_for_completion(client, app.id) == state
    else:
        with run_application(client, spec=spec, connect=False) as app_id:
            # service_fails results in immediate failure
            # driver_succeeds results in immediate success
            # driver_fails results in immediate failure
            assert wait_for_completion(client, app_id) == state
Beispiel #9
0
def test_client_errors_nicely_if_not_logged_in(security, not_logged_in):
    appid = 'application_1526134340424_0012'

    spec = skein.ApplicationSpec(name="should_never_get_to_run",
                                 queue="default",
                                 services={
                                     'service':
                                     skein.Service(resources=skein.Resources(
                                         memory=32, vcores=1),
                                                   script='env')
                                 })

    with skein.Client(security=security) as client:
        for func, args in [('get_applications', ()), ('get_nodes', ()),
                           ('get_queue', ('default', )),
                           ('get_child_queues', ('default', )),
                           ('get_all_queues', ()),
                           ('application_report', (appid, )),
                           ('connect', (appid, )),
                           ('move_application', (appid, 'default')),
                           ('kill_application', (appid, )),
                           ('submit', (spec, ))]:
            with pytest.raises(skein.DriverError) as exc:
                getattr(client, func)(*args)
            assert 'kinit' in str(exc.value)
Beispiel #10
0
def test_allow_failures_max_restarts(client, allow_failures):
    name = "test_max_restarts_allow_failures_%s" % str(allow_failures).lower()
    spec = skein.ApplicationSpec(
        name=name,
        master=skein.Master(
            script="sleep infinity"
        ),
        services={
            'myservice': skein.Service(
                instances=1,
                max_restarts=2,
                allow_failures=allow_failures,
                resources=skein.Resources(memory=128, vcores=1),
                script="exit 1"
            )
        }
    )
    with run_application(client, spec=spec) as app:
        if allow_failures:
            # Service failed 3 times, doesn't keep trying to run more
            wait_for_containers(app, 3, states=['FAILED'])
            # Check still running fine after 3 failures
            time.sleep(0.5)
            app.get_specification()
            # Shutdown manually
            app.shutdown()
            assert wait_for_completion(client, app.id) == 'SUCCEEDED'
        else:
            # Service failed 3 times and then terminates
            assert wait_for_completion(client, app.id) == 'FAILED'
def launch_remote_check(file: str) -> Tuple[bool, str]:
    logging.info('Launching remote check')
    zip_hdfs, _ = cluster_pack.upload_env(packer=cluster_pack.PEX_PACKER)
    archive_name = os.path.basename(zip_hdfs)
    with skein.Client() as client:
        files = {
            archive_name: zip_hdfs,
            'check_hadoop_env.py': __file__,
        }
        editable_packages = cluster_pack.get_editable_requirements()
        if 'tf_yarn' in editable_packages:
            tf_yarn_zip = cluster_pack.zip_path(editable_packages['tf_yarn'],
                                                False)
            logger.info(f"zip path for editable tf_yarn is {tf_yarn_zip}")
            files.update({'tf_yarn': tf_yarn_zip})
        service = skein.Service(
            script=f'./{archive_name} check_hadoop_env.py --file {file}',
            resources=skein.Resources(2 * 1024, 1),
            env={
                'PEX_ROOT': '/tmp/{uuid.uuid4()}/',
                'PYTHONPATH': '.:',
            },
            files=files,
            instances=1)
        spec = skein.ApplicationSpec(
            {'HADOOP_ENV_CHECKER': service},
            acls=skein.model.ACLs(enable=True, view_users=['*']),
        )
        app = client.submit_and_connect(spec)

        logging.info('Remote check started')
        result = app.kv.wait('result').decode()
        app_id = app.id
        app.shutdown()
        return result == "True", app_id
Beispiel #12
0
def test_memory_limit_exceeded(kind, client):
    resources = skein.Resources(memory=128, vcores=1)
    # Allocate noticeably more memory than the 128 MB limit
    script = 'python -c "b = bytearray(int(256e6)); import time; time.sleep(10)"'

    master = services = None
    if kind == 'master':
        master = skein.Master(resources=resources, script=script)
        search_txt = "memory limit"
    else:
        services = {
            'service': skein.Service(resources=resources, script=script)
        }
        search_txt = "memory used"
    spec = skein.ApplicationSpec(name="test_memory_limit_exceeded_%s" % kind,
                                 queue="default",
                                 master=master,
                                 services=services)
    with run_application(client, spec=spec, connect=False) as app_id:
        assert wait_for_completion(client, app_id) == "FAILED"
    logs = get_logs(app_id)
    assert search_txt in logs

    if kind == 'master':
        report = client.application_report(app_id)
        assert 'memory limit' in report.diagnostics
Beispiel #13
0
def _make_submit_specification(script, args=(), **kwargs):
    spec = _make_specification(**kwargs)

    environment = lookup(kwargs, 'environment', 'yarn.environment')
    files, build_script = _files_and_build_script(environment)

    if 'dask.scheduler' in spec.services:
        # deploy_mode == 'remote'
        client_vcores = lookup(kwargs, 'client_vcores', 'yarn.client.vcores')
        client_memory = lookup(kwargs, 'client_memory', 'yarn.client.memory')
        client_env = lookup(kwargs, 'client_env', 'yarn.client.env')
        client_memory = parse_memory(client_memory, 'client')

        script_name = os.path.basename(script)
        files[script_name] = script

        spec.services['dask.client'] = skein.Service(
            instances=1,
            resources=skein.Resources(vcores=client_vcores,
                                      memory=client_memory),
            max_restarts=0,
            depends=['dask.scheduler'],
            files=files,
            env=client_env,
            script=build_script('services client %s %s' %
                                (script_name, ' '.join(args))))
    return spec
Beispiel #14
0
def _make_submit_specification(script, args=(), **kwargs):
    client_vcores = lookup(kwargs, 'client_vcores', 'yarn.client.vcores')
    client_memory = lookup(kwargs, 'client_memory', 'yarn.client.memory')
    client_env = lookup(kwargs, 'client_env', 'yarn.client.env')
    client_memory = parse_memory(client_memory, 'client')

    spec = _make_specification(**kwargs)
    environment = spec.services['dask.worker'].files['environment']

    script_name = os.path.basename(script)

    spec.services['dask.client'] = skein.Service(
        instances=1,
        resources=skein.Resources(vcores=client_vcores, memory=client_memory),
        max_restarts=0,
        depends=['dask.scheduler'],
        files={
            'environment': environment,
            script_name: script
        },
        env=client_env,
        commands=[
            'source environment/bin/activate',
            'dask-yarn services client %s %s' % (script_name, ' '.join(args))
        ])
    return spec
Beispiel #15
0
def _setup_skein_cluster(pyenvs: Dict[NodeLabel, PythonEnvDescription],
                         task_specs: Dict[str, TaskSpec] = TASK_SPEC_NONE,
                         *,
                         skein_client: skein.Client = None,
                         files: Dict[str, str] = None,
                         env: Dict[str, str] = {},
                         queue: str = "default",
                         acls: ACLs = None,
                         file_systems: List[str] = None,
                         log_conf_file: str = None,
                         standalone_client_mode: bool = False) -> SkeinCluster:
    os.environ["JAVA_TOOL_OPTIONS"] = \
        "-XX:ParallelGCThreads=1 -XX:CICompilerCount=2 "\
        f"{os.environ.get('JAVA_TOOL_OPTIONS', '')}"

    with tempfile.TemporaryDirectory() as tempdir:
        task_files, task_env = _setup_task_env(tempdir, files, env)
        services = {}
        for task_type, task_spec in list(task_specs.items()):
            pyenv = pyenvs[task_spec.label]
            service_env = task_env.copy()
            if task_spec.termination_timeout_seconds >= 0:
                _add_to_env(service_env, "SERVICE_TERMINATION_TIMEOUT_SECONDS",
                            str(task_spec.termination_timeout_seconds))
            services[task_type] = skein.Service(
                script=gen_task_cmd(pyenv, log_conf_file),
                resources=skein.model.Resources(task_spec.memory,
                                                task_spec.vcores),
                max_restarts=0,
                instances=task_spec.instances,
                node_label=task_spec.label.value,
                files={
                    **task_files, pyenv.dest_path: pyenv.path_to_archive
                },
                env=service_env)

        spec = skein.ApplicationSpec(services,
                                     queue=queue,
                                     acls=acls,
                                     file_systems=file_systems)

        if skein_client is None:
            skein_client = skein.Client()

        task_instances = [(task_type, spec.instances)
                          for task_type, spec in task_specs.items()]
        events: Dict[str, Dict[str, str]] = \
            {task: {} for task in iter_tasks(task_instances)}
        app = skein_client.submit_and_connect(spec)
        # Start a thread which collects all events posted by all tasks in kv store
        event_listener = Thread(target=_aggregate_events,
                                args=(app.kv, events))
        event_listener.start()

        cluster_spec = _setup_cluster_tasks(task_instances, app,
                                            standalone_client_mode)

        return SkeinCluster(skein_client, app, task_instances, cluster_spec,
                            event_listener, events)
Beispiel #16
0
def test_file_systems(client):
    script = 'hdfs dfs -touchz /user/testuser/test_file_systems'
    service = skein.Service(resources=skein.Resources(memory=128, vcores=1),
                            script=script)
    spec = skein.ApplicationSpec(name="test_file_systems",
                                 queue="default",
                                 services={'service': service},
                                 file_systems=["hdfs://master.example.com:9000"])

    with run_application(client, spec=spec) as app:
        assert wait_for_completion(client, app.id) == 'SUCCEEDED'
Beispiel #17
0
def create_skein_app():
    service = skein.Service(commands=['./example.pex distributed.py --server'],
                            resources=skein.Resources(2 * 1024, 1),
                            env={'PEX_ROOT': '/tmp/{uuid.uuid4()}/'},
                            files={
                                'example.pex': 'example.pex',
                                'distributed.py': __file__
                            },
                            instances=2)
    spec = skein.ApplicationSpec({NODE_NAME: service}, queue='dev')
    return spec
Beispiel #18
0
def test_add_container(client):
    script = ('echo "$SKEIN_CONTAINER_ID - MYENV=$MYENV"\n'
              'echo "$SKEIN_CONTAINER_ID - MYENV2=$MYENV2"\n'
              'if [[ "$MYENV" == "bar" ]]; then\n'
              '  exit 1\n'
              'else\n'
              '  exit 0\n'
              'fi')

    spec = skein.ApplicationSpec(name="test_add_container",
                                 master=skein.Master(script="sleep infinity"),
                                 services={
                                     'test':
                                     skein.Service(instances=0,
                                                   resources=skein.Resources(
                                                       memory=32, vcores=1),
                                                   env={
                                                       'MYENV': 'foo',
                                                       'MYENV2': 'baz'
                                                   },
                                                   max_restarts=1,
                                                   script=script)
                                 })

    with run_application(client, spec=spec) as app:
        # Add container with new overrides
        c = app.add_container('test')
        assert c.instance == 0
        wait_for_containers(app, 1, states=['RUNNING', 'SUCCEEDED'])

        # Non-existant service
        with pytest.raises(ValueError):
            app.add_container('foobar')

        # Add container with override for MYENV
        c = app.add_container('test', {'MYENV': 'bar'})
        assert c.instance == 1

        # The new env var triggers a failure, should fail twice,
        # then fail the whole application
        assert wait_for_completion(client, app.id) == 'FAILED'

    logs = get_logs(app.id)
    assert "test_0 - MYENV=foo" in logs
    assert "test_0 - MYENV2=baz" in logs

    assert "test_1 - MYENV=bar" in logs
    assert "test_1 - MYENV2=baz" in logs

    assert "test_2 - MYENV=bar" in logs
    assert "test_2 - MYENV2=baz" in logs

    assert "test_3" not in logs
Beispiel #19
0
def test_file_systems(client):
    commands = ['hdfs dfs -touchz /user/testuser/test_file_systems']
    service = skein.Service(resources=skein.Resources(memory=124, vcores=1),
                            commands=commands)
    spec = skein.ApplicationSpec(
        name="test_file_systems",
        queue="default",
        services={'service': service},
        file_systems=["hdfs://master.example.com:9000"])

    with run_application(client, spec=spec) as app:
        wait_for_success(client, app.id)
Beispiel #20
0
    def _build_specification(self, cluster, cert_path, key_path):
        files = {
            k: skein.File.from_dict(v) if isinstance(v, dict) else v
            for k, v in cluster.config.localize_files.items()
        }

        files["dask.crt"] = cert_path
        files["dask.pem"] = key_path

        scheduler_cmd = " ".join(self.get_scheduler_command(cluster))
        worker_cmd = " ".join(
            self.get_worker_command(
                cluster,
                worker_name="$DASK_GATEWAY_WORKER_NAME",
                scheduler_address="$DASK_GATEWAY_SCHEDULER_ADDRESS",
            ))
        scheduler_script = f"{cluster.config.scheduler_setup}\n{scheduler_cmd}"
        worker_script = f"{cluster.config.worker_setup}\n{worker_cmd}"

        master = skein.Master(
            security=self._get_security(cluster),
            resources=skein.Resources(
                memory="%d b" % cluster.config.scheduler_memory,
                vcores=cluster.config.scheduler_cores,
            ),
            files=files,
            env=self.get_scheduler_env(cluster),
            script=scheduler_script,
        )

        services = {
            "dask.worker":
            skein.Service(
                resources=skein.Resources(
                    memory="%d b" % cluster.config.worker_memory,
                    vcores=cluster.config.worker_cores,
                ),
                instances=0,
                max_restarts=0,
                allow_failures=True,
                files=files,
                env=self.get_worker_env(cluster),
                script=worker_script,
            )
        }

        return skein.ApplicationSpec(
            name="dask-gateway",
            queue=cluster.config.queue,
            user=cluster.username,
            master=master,
            services=services,
        )
Beispiel #21
0
def test_set_log_level(client):
    service = skein.Service(resources=skein.Resources(memory=128, vcores=1),
                            script='ls')
    spec = skein.ApplicationSpec(name="test_custom_log4j_properties",
                                 queue="default",
                                 master=skein.Master(log_level='debug'),
                                 services={'service': service})

    with run_application(client, spec=spec) as app:
        assert wait_for_completion(client, app.id) == 'SUCCEEDED'

    logs = get_logs(app.id)
    assert 'DEBUG' in logs
Beispiel #22
0
def test_memory_limit_exceeded(client):
    # Allocate noticeably more memory than the 128 MB limit
    service = skein.Service(
        resources=skein.Resources(memory=128, vcores=1),
        commands=[
            'python -c "b = bytearray(int(256e6)); import time; time.sleep(10)"'
        ])
    spec = skein.ApplicationSpec(name="test_memory_limit_exceeded",
                                 queue="default",
                                 services={"service": service})
    with run_application(client, spec=spec) as app:
        assert wait_for_completion(client, app.id) == "FAILED"
    logs = get_logs(app.id)
    assert "memory used" in logs
 def launch_skein():
     with skein.Client() as client:
         service = skein.Service(
             resources=skein.model.Resources("1 GiB", 1),
             script=f'''
                 set -x
                 hdfs dfs -cat {filepath_on_hdfs}
             '''
         )
         spec = skein.ApplicationSpec(services={"service": service})
         app_id = client.submit(spec)
         skein_launcher.wait_for_finished(client, app_id)
         logs = skein_launcher.get_application_logs(client, app_id, 2)
         for key, value in logs.items():
             print(f"skein logs:{key} {value}")
Beispiel #24
0
def test_proxy_user_no_permissions(client):
    spec = skein.ApplicationSpec(name="test_proxy_user_no_permissions",
                                 user="******",
                                 services={
                                     'service':
                                     skein.Service(resources=skein.Resources(
                                         memory=128, vcores=1),
                                                   commands=['env'])
                                 })
    # No permission to submit as user
    with pytest.raises(skein.DriverError) as exc:
        client.submit(spec)

    exc_msg = str(exc.value)
    assert 'testuser' in exc_msg
    assert 'bob' in exc_msg
Beispiel #25
0
def test_custom_log4j_properties(client, tmpdir):
    configpath = str(tmpdir.join("log4j.properties"))
    service = skein.Service(resources=skein.Resources(memory=128, vcores=1),
                            script='ls')
    spec = skein.ApplicationSpec(name="test_custom_log4j_properties",
                                 queue="default",
                                 master=skein.Master(log_config=configpath),
                                 services={'service': service})
    with open(configpath, 'w') as f:
        f.write(custom_log4j_properties)

    with run_application(client, spec=spec) as app:
        assert wait_for_completion(client, app.id) == 'SUCCEEDED'

    logs = get_logs(app.id)
    assert 'CUSTOM-LOG4J-SUCCEEDED' in logs
Beispiel #26
0
    def _build_specification(self, cluster_info, cert_path, key_path):
        files = {
            k: skein.File.from_dict(v) if isinstance(v, dict) else v
            for k, v in self.localize_files.items()
        }

        files["dask.crt"] = cert_path
        files["dask.pem"] = key_path

        env = self.get_env(cluster_info)

        scheduler_script = "\n".join(
            [self.scheduler_setup, self.scheduler_command])
        worker_script = "\n".join([self.worker_setup, self.worker_command])

        master = skein.Master(
            security=self._get_security(cluster_info),
            resources=skein.Resources(memory="%d b" % self.scheduler_memory,
                                      vcores=self.scheduler_cores),
            files=files,
            env=env,
            script=scheduler_script,
        )

        services = {
            "dask.worker":
            skein.Service(
                resources=skein.Resources(memory="%d b" % self.worker_memory,
                                          vcores=self.worker_cores),
                instances=0,
                max_restarts=0,
                allow_failures=True,
                files=files,
                env=env,
                script=worker_script,
            )
        }

        return skein.ApplicationSpec(
            name="dask-gateway",
            queue=self.queue,
            user=cluster_info.username,
            master=master,
            services=services,
        )
Beispiel #27
0
def test_proxy_user_no_permissions(client, hadoop3):
    if hadoop3:
        pytest.skip("Lack of proxyuser permissions causes "
                    "yarnclient to hang in hadoop3")
    spec = skein.ApplicationSpec(name="test_proxy_user_no_permissions",
                                 user="******",
                                 services={
                                     'service':
                                     skein.Service(resources=skein.Resources(
                                         memory=32, vcores=1),
                                                   script='env')
                                 })
    # No permission to submit as user
    with pytest.raises(skein.DriverError) as exc:
        client.submit(spec)

    exc_msg = str(exc.value)
    assert 'testuser' in exc_msg
    assert 'bob' in exc_msg
Beispiel #28
0
def test_container_permissions(client, has_kerberos_enabled):
    commands = [
        'echo "USER_ENV=[$USER]"', 'echo "LOGIN_ID=[$(whoami)]"',
        'hdfs dfs -touchz /user/testuser/test_container_permissions'
    ]
    service = skein.Service(resources=skein.Resources(memory=128, vcores=1),
                            commands=commands)
    spec = skein.ApplicationSpec(name="test_container_permissions",
                                 queue="default",
                                 services={'service': service})

    with run_application(client, spec=spec) as app:
        wait_for_success(app)

    logs = get_logs(app.app_id)
    assert "USER_ENV=[testuser]" in logs
    if has_kerberos_enabled:
        assert "LOGIN_ID=[testuser]" in logs
    else:
        assert "LOGIN_ID=[yarn]" in logs
Beispiel #29
0
def test_container_environment(runon, client, has_kerberos_enabled):
    script = ('set -e\n'
              'env\n'
              'echo "LOGIN_ID=[$(whoami)]"\n'
              'hdfs dfs -touchz /user/testuser/test_container_permissions\n'
              'yarn application -list')
    kwargs = dict(resources=skein.Resources(memory=512, vcores=1),
                  script=script)
    services = master = None
    if runon == 'service':
        services = {'service': skein.Service(**kwargs)}
    else:
        master = skein.Master(**kwargs)

    spec = skein.ApplicationSpec(name="test_container_permissions_%s" % runon,
                                 queue="default",
                                 services=services,
                                 master=master)

    with run_application(client, spec=spec, connect=False) as app_id:
        assert wait_for_completion(client, app_id) == 'SUCCEEDED'

    logs = get_logs(app_id)
    assert "USER=testuser" in logs
    assert 'SKEIN_APPMASTER_ADDRESS=' in logs
    assert 'SKEIN_APPLICATION_ID=%s' % app_id in logs
    if runon == 'service':
        assert 'SKEIN_CONTAINER_ID=service_0' in logs
    assert 'SKEIN_RESOURCE_MEMORY=512' in logs
    assert 'SKEIN_RESOURCE_VCORES=1' in logs
    assert 'CLASSPATH' not in logs

    if has_kerberos_enabled:
        assert "LOGIN_ID=[testuser]" in logs
        assert "HADOOP_USER_NAME" not in logs
    else:
        assert "LOGIN_ID=[yarn]" in logs
        assert "HADOOP_USER_NAME" in logs
Beispiel #30
0
def _make_specification(**kwargs):
    """Create specification to run Dask Cluster

    This creates a ``skein.ApplicationSpec`` to run a dask cluster with the
    scheduler in a YARN container. See the docstring for ``YarnCluster`` for
    more details.
    """
    if all(v is None
           for v in kwargs.values()) and dask.config.get("yarn.specification"):
        # No overrides and full specification in configuration
        spec = dask.config.get("yarn.specification")
        if isinstance(spec, dict):
            return skein.ApplicationSpec.from_dict(spec)
        return skein.ApplicationSpec.from_file(spec)

    deploy_mode = lookup(kwargs, "deploy_mode", "yarn.deploy-mode")
    if deploy_mode not in {"remote", "local"}:
        raise ValueError("`deploy_mode` must be one of {'remote', 'local'}, "
                         "got %r" % deploy_mode)

    name = lookup(kwargs, "name", "yarn.name")
    queue = lookup(kwargs, "queue", "yarn.queue")
    tags = lookup(kwargs, "tags", "yarn.tags")
    user = lookup(kwargs, "user", "yarn.user")

    environment = lookup(kwargs, "environment", "yarn.environment")
    if environment is None:
        msg = (
            "You must provide a path to a Python environment for the workers.\n"
            "This may be one of the following:\n"
            "- A conda environment archived with conda-pack\n"
            "- A virtual environment archived with venv-pack\n"
            "- A path to a conda environment, specified as conda://...\n"
            "- A path to a virtual environment, specified as venv://...\n"
            "- A path to a python binary to use, specified as python://...\n"
            "\n"
            "See http://yarn.dask.org/environments.html for more information.")
        raise ValueError(msg)

    n_workers = lookup(kwargs, "n_workers", "yarn.worker.count")
    worker_restarts = lookup(kwargs, "worker_restarts", "yarn.worker.restarts")
    worker_env = lookup(kwargs, "worker_env", "yarn.worker.env")
    worker_vcores = lookup(kwargs, "worker_vcores", "yarn.worker.vcores")
    worker_memory = parse_memory(
        lookup(kwargs, "worker_memory", "yarn.worker.memory"), "worker")

    services = {}

    files, build_script = _files_and_build_script(environment)

    if deploy_mode == "remote":
        scheduler_vcores = lookup(kwargs, "scheduler_vcores",
                                  "yarn.scheduler.vcores")
        scheduler_memory = parse_memory(
            lookup(kwargs, "scheduler_memory", "yarn.scheduler.memory"),
            "scheduler")

        services["dask.scheduler"] = skein.Service(
            instances=1,
            resources=skein.Resources(vcores=scheduler_vcores,
                                      memory=scheduler_memory),
            max_restarts=0,
            files=files,
            script=build_script("services scheduler"),
        )
        worker_depends = ["dask.scheduler"]
    else:
        worker_depends = None

    services["dask.worker"] = skein.Service(
        instances=n_workers,
        resources=skein.Resources(vcores=worker_vcores, memory=worker_memory),
        max_restarts=worker_restarts,
        depends=worker_depends,
        files=files,
        env=worker_env,
        script=build_script("services worker"),
    )

    spec = skein.ApplicationSpec(name=name,
                                 queue=queue,
                                 tags=tags,
                                 user=user,
                                 services=services)
    return spec