Exemple #1
0
    def __init__(self,
                 autodetect=True,
                 packages=None,
                 ip=None,
                 env=None,
                 channels=None,
                 conda_pars=None,
                 **kwargs):

        ip = ip or socket.gethostbyname(socket.gethostname())

        self.env = env
        self.application_master_container = None
        self.app_id = None
        self.channels = channels
        self.conda_pars = conda_pars

        try:
            self.local_cluster = LocalCluster(n_workers=0, ip=ip)
        except (OSError, IOError):
            self.local_cluster = LocalCluster(n_workers=0,
                                              scheduler_port=0,
                                              ip=ip)

        self.packages = list(
            sorted(unique((packages or []) + global_packages, key=first_word)))

        self.knit = Knit(autodetect=autodetect, **kwargs)

        atexit.register(self.stop)
Exemple #2
0
def setup_cluster(config):
    if 'scheduler.ip' not in config:
        scheduler_ip = socket.gethostbyname(socket.gethostname())
    else:
        scheduler_ip = config['scheduler.ip']
    cluster = LocalCluster(n_workers=0,
                           ip=scheduler_ip,
                           port=config['scheduler.port'],
                           diagnostics_port=config['scheduler.bokeh_port'])

    if hdfs3 is not None:
        hdfs = hdfs3.HDFileSystem(host=config.get('hdfs.host'),
                                  port=config.get('hdfs.port'))
    else:
        hdfs = None

    knit = Knit(hdfs=hdfs,
                hdfs_home=config.get('hdfs.home'),
                rm=config.get('yarn.host'),
                rm_port=config.get('yarn.port'))

    command = ('$PYTHON_BIN $CONDA_PREFIX/bin/dask-worker '
               '--nprocs={nprocs:d} '
               '--nthreads={nthreads:d} '
               '--memory-limit={memory_limit:d} '
               '{scheduler_address} '
               '> /tmp/worker-log.out '
               '2> /tmp/worker-log.err').format(
                    nprocs=config['worker.processes'],
                    nthreads=config['worker.threads_per_process'],
                    memory_limit=int(config['worker.memory'] * 1e6),
                    scheduler_address=cluster.scheduler.address)

    app_id = knit.start(command,
                        env=config['cluster.env'],
                        num_containers=config['cluster.count'],
                        virtual_cores=config['worker.cpus'],
                        memory=config['worker.memory'],
                        queue=config['yarn.queue'],
                        app_name='dask',
                        checks=False)

    # Add a few missing fields to config before writing to disk
    config2 = config.copy()
    # The ip is optional, the port may be chosen dynamically
    config2['scheduler.ip'] = cluster.scheduler.ip
    config2['scheduler.port'] = cluster.scheduler.port
    # Fill in optional parameters with auto-detected versions
    config2['yarn.host'] = knit.conf['rm']
    config2['yarn.port'] = knit.conf['rm_port']
    config2['hdfs.home'] = knit.hdfs_home
    # Add in runtime information like app_id and daemon pid
    config2['application.id'] = app_id
    config2['application.pid'] = os.getpid()

    return cluster, knit, config2
Exemple #3
0
def test_password_pass():
    with pytest.raises(KnitException):
        k = Knit(pars={
            'hadoop.http.authentication.type': 'simple',
            'hadoop.http.authentication.simple.anonymous.allowed': 'false'})
    k = Knit(pars={
        'hadoop.http.authentication.type': 'simple',
        'hadoop.http.authentication.simple.anonymous.allowed': 'false'},
        password='******')
    assert k.yarn_api.auth[1] == 'hello'
Exemple #4
0
def k():
    knitter = Knit(nn='localhost', rm='localhost', nn_port=8020, rm_port=8088,
                   replication_factor=1)
    try:
        yield knitter
    finally:
        # always kill, to avoid follow-on resource pressure
        try:
            knitter.kill()
        except:
            pass
Exemple #5
0
class YarnPool(RemotePool):
    """The Yarn Pool mananger."""

    def __init__(self, processes=None, port=0, authkey=None):
        super(YarnPool, self).__init__(processes=processes,
                                       port=port,
                                       authkey=authkey,
                                       workerscript=None)
        self.stopping = False
        self.knit = Knit(autodetect=True)

        cmd = ('python remoteworker.py --host {} --port {} --key {}'
               .format(socket.gethostname(),
                       self.server.address[1],
                       self.authkey))
        self.app_id = self.knit.start(
            cmd, num_containers=self._processes,
            files=['joblibhadoop/yarn/remoteworker.py', ])
        self.thread = Thread(target=self._monitor_appid)
        self.thread.deamon = True
        self.thread.start()

    def _start_remote_worker(self, pid):
        remote_worker = RemoteWorker(pid)
        self._pool.append(remote_worker)

    def _monitor_appid(self):
        while not self.stopping:
            try:
                status = self.knit.status()
                yarn_state = status['app']['state']
                print("YARN application is {}".format(yarn_state))
                # if yarn_state == 'FINISHED':
                #     self.terminate()
            except:
                pass
            sleep(1)

    def terminate(self):
        self.stopping = True
        super(YarnPool, self).terminate()
        self.knit.kill()

    def __reduce__(self):
        pass
Exemple #6
0
class YarnPool(RemotePool):

    def __init__(self, processes=None, port=0, authkey=None):
        super(YarnPool, self).__init__(processes=processes,
                                       port=port,
                                       authkey=authkey,
                                       workerscript=None)
        self.stopping = False

        from knit import Knit
        self.k = Knit(autodetect=True)

        cmd = "python remoteworker.py --port %d --key %s" % (self.s.address[1], self.authkey)
        self.app_id = self.k.start(cmd,
                                   self._processes,
                                   files=['remoteworker.py', ])
        self.t = Thread(target=self._monitor_appid)
        self.t.deamon = True
        self.t.start()

    def _start_remote_worker(self, pid):
        rw = RemoteWorker(pid)
        self._pool.append(rw)

    def spinning_cursor(self):
        while True:
            for cursor in '|/-\\':
                yield cursor

    def _monitor_appid(self):
        cursor = self.spinning_cursor()
        while not self.stopping:
            try:
                status = self.k.status(self.app_id)
                yarnState = status['app']['state']
                print "YARN application is", yarnState
            except:
                pass
            sleep(1)

    def terminate(self):
        self.stopping = True
        super(YarnPool, self).terminate()

        self.k.kill(self.app_id)
Exemple #7
0
    def __init__(self, processes=None, port=0, authkey=None):
        super(YarnPool, self).__init__(processes=processes,
                                       port=port,
                                       authkey=authkey,
                                       workerscript=None)
        self.stopping = False
        self.knit = Knit(autodetect=True)

        cmd = ('python remoteworker.py --host {} --port {} --key {}'
               .format(socket.gethostname(),
                       self.server.address[1],
                       self.authkey))
        self.app_id = self.knit.start(
            cmd, num_containers=self._processes,
            files=['joblibhadoop/yarn/remoteworker.py', ])
        self.thread = Thread(target=self._monitor_appid)
        self.thread.deamon = True
        self.thread.start()
Exemple #8
0
def test_hdfs_home():
    hdfs3 = pytest.importorskip('hdfs3')
    hdfs = hdfs3.HDFileSystem()
    d = '/tmp/test'
    try:
        hdfs.mkdir(d)
        k = Knit(nn='localhost', rm='localhost', nn_port=8020, rm_port=8088,
                 replication_factor=1, hdfs_home=d)

        env_zip = k.create_env(env_name='dev', packages=['python=2.7'], remove=True)
        k.start('env', files=[env_zip], memory=128)

        assert d + '/.knitDeps' in hdfs.ls(d, False)
        assert d + "/.knitDeps/knit-1.0-SNAPSHOT.jar" in hdfs.ls(d + '/.knitDeps', False)
        assert d + "/.knitDeps/dev.zip" in hdfs.ls(d + '/.knitDeps', False)
        if not k.wait_for_completion(30):
            k.kill()

    finally:
        hdfs.rm(d, True)
        k.kill()
Exemple #9
0
def clear():
    c = CondaCreator()
    try:
        yield
    finally:
        shutil.rmtree(c.conda_envs)
        try:
            k = Knit()
            import hdfs3
            hdfs = hdfs3.HDFileSystem()
            hdfs.rm(k.hdfs_home, recursive=True)
        except:
            pass
Exemple #10
0
    def __init__(self, processes=None, port=0, authkey=None):
        super(YarnPool, self).__init__(processes=processes,
                                       port=port,
                                       authkey=authkey,
                                       workerscript=None)
        self.stopping = False

        from knit import Knit
        self.k = Knit(autodetect=True)

        cmd = "python remoteworker.py --port %d --key %s" % (self.s.address[1], self.authkey)
        self.app_id = self.k.start(cmd,
                                   self._processes,
                                   files=['remoteworker.py', ])
        self.t = Thread(target=self._monitor_appid)
        self.t.deamon = True
        self.t.start()
Exemple #11
0
class DaskYARNCluster(object):
    """
    Implements a dask cluster with YARN containers running the worker processes.
    A dask scheduler is started locally upon instantiation, but you must call
    ``start()`` to initiate the building of containers by YARN.
    
    Parameters
    ----------
    nn, nn_port, rm, rm_port, user, autodetect: see knit.Knit
    env: str or None
        If provided, the path of a zipped conda env to put in containers
    packages: list of str
        Packages to install in the env to provide to containers *if* env is 
        None. Uses conda spec for pinning versions. dask and distributed will
        always be included.
    channels: list of str
        If building an environment, pass these extra channels to conda using
        ``-c`` (i.e., in addition but of superior priority to any system
        default channels).
    conda_pars: dict
        Things to pass to CondaCreator
    ip: IP-like string or None
        Address for the scheduler to listen on. If not given, uses the system
        IP.
    """
    def __init__(self,
                 autodetect=True,
                 packages=None,
                 ip=None,
                 env=None,
                 channels=None,
                 conda_pars=None,
                 **kwargs):

        ip = ip or socket.gethostbyname(socket.gethostname())

        self.env = env
        self.application_master_container = None
        self.app_id = None
        self.channels = channels
        self.conda_pars = conda_pars

        try:
            self.local_cluster = LocalCluster(n_workers=0, ip=ip)
        except (OSError, IOError):
            self.local_cluster = LocalCluster(n_workers=0,
                                              scheduler_port=0,
                                              ip=ip)

        self.packages = list(
            sorted(unique((packages or []) + global_packages, key=first_word)))

        self.knit = Knit(autodetect=autodetect, **kwargs)

        atexit.register(self.stop)

    @property
    def scheduler_address(self):
        return self.local_cluster.scheduler_address

    def start(self, n_workers=1, cpus=1, memory=2048, checks=True, **kwargs):
        """
        Initiate workers. If required, environment is first built and uploaded
        to HDFS, and then a YARN application with the required number of
        containers is created.
        
        Parameters
        ----------
        n_workers: int
            How many containers to create
        cpus: int=1
            How many CPU cores is available in each container
        memory: int=2048
            Memory available to each dask worker (in MB)
        checks: bool=True
            Whether to run pre-flight checks before submitting app to YARN
        kwargs: passed to ``Knit.start()``
        
        Returns
        -------
        YARN application ID.
        """
        c = CondaCreator(channels=self.channels, **(self.conda_pars or {}))
        if self.env is None:
            env_name = 'dask-' + sha1('-'.join(
                self.packages).encode()).hexdigest()
            env_path = os.path.join(c.conda_envs, env_name)
            if os.path.exists(env_path + '.zip'):
                # zipfile exists, ready to upload
                self.env = env_path + '.zip'
            elif os.path.exists(env_path):
                # environment exists, can zip and upload
                c.zip_env(env_path)
                self.env = env_path + '.zip'
            else:
                # create env from scratch
                self.env = c.create_env(env_name=env_name,
                                        packages=self.packages)
        elif not self.env.endswith('.zip'):
            # given env directory, so zip it
            c.zip_env(self.env)
            self.env = self.env + '.zip'

        # TODO: memory should not be total available?
        command = '$PYTHON_BIN $CONDA_PREFIX/bin/dask-worker --nprocs=1 ' \
                  '--nthreads=%d --memory-limit=%d %s > ' \
                  '/tmp/worker-log.out 2> /tmp/worker-log.err' % (
                      cpus, memory * 1e6,
                      self.local_cluster.scheduler.address)

        app_id = self.knit.start(command,
                                 env=self.env,
                                 num_containers=n_workers,
                                 virtual_cores=cpus,
                                 memory=memory,
                                 checks=checks,
                                 **kwargs)
        self.app_id = app_id
        return app_id

    def remove_worker(self, container_id):
        """
        Stop worker and remove container

        Parameters
        ----------
        container_id

        Returns
        -------
        None
        """
        self.knit.remove_containers(container_id)

    @property
    def workers(self):
        """
        list of running container ids
        """

        # remove container ...00001 -- this is applicationMaster's container and
        # should not be removed or counted as a worker

        containers = list(self.knit.get_container_statuses())
        containers.sort()
        self.application_master_container = containers.pop(0)
        return containers

    @gen.coroutine
    def _start(self):
        pass

    def stop(self):
        """Kill the YARN application and all workers"""
        if self.knit:
            self.knit.kill()

    def add_workers(self, n_workers=1, cpus=1, memory=2048):
        """
        Non-blocking function to ask Yarn for more containers/dask-workers

        Parameters
        ----------
        n_workers: int
            number of containers to add (default: 1)

        cpus: int
            number of cpus (default: 1)
        memory: int
            amount of memory to allocate per container

        Returns
        -------
        None
        """

        self.knit.add_containers(num_containers=n_workers,
                                 virtual_cores=cpus,
                                 memory=memory)

    def __enter__(self):
        return self

    def __exit__(self, *args):
        self.close()

    def close(self):
        """Stop the scheduler and workers"""
        self.stop()
        self.local_cluster.close()
Exemple #12
0
def test_connection_error():
    k = Knit(rm_port=8089)
    with pytest.raises(YARNException) as e:
        k.start('ls')
    assert 'proxy' in str(e)