예제 #1
0
    def test_join_timeout(self):
        logging.debug('')
        logging.debug('test_join_timeout')

        worker_q = WorkerPool.get()
        worker_q.put((time.sleep, (3, ), {}, self.reply_q))
        WorkerPool.cleanup()
예제 #2
0
    def test_never_released(self):
        logging.debug('')
        logging.debug('test_never_released')

        worker_q = WorkerPool.get()
        worker_q.put((self.add, (1, ), {}, self.reply_q))
        WorkerPool.cleanup()
    def test_never_released(self):
        logging.debug('')
        logging.debug('test_never_released')

        worker_q = WorkerPool.get()
        worker_q.put((self.add, (1,), {}, self.reply_q))
        WorkerPool.cleanup()
    def test_join_timeout(self):
        logging.debug('')
        logging.debug('test_join_timeout')

        worker_q = WorkerPool.get()
        worker_q.put((time.sleep, (3,), {}, self.reply_q))
        WorkerPool.cleanup()
예제 #5
0
    def max_servers(self, resource_desc):
        """
        Returns the total of :meth:`max_servers` across all
        :class:`LocalAllocator` in the cluster.

        resource_desc: dict
            Description of required resources.
        """
        credentials = get_credentials()

        key = 'allocator'
        value = resource_desc.get(key, '')
        if value:
            if self.name != value:
                return 0
            else:
                # Any host in our cluster is OK.
                resource_desc = resource_desc.copy()
                del resource_desc[key]

        with self._lock:
            # Drain _reply_q.
            while True:
                try:
                    self._reply_q.get_nowait()
                except Queue.Empty:
                    break

            # Get counts via worker threads.
            todo = []
            max_workers = 10
            for i, allocator in enumerate(self._allocators.values()):
                if i < max_workers:
                    worker_q = WorkerPool.get()
                    worker_q.put((self._get_count,
                                  (allocator, resource_desc, credentials),
                                  {}, self._reply_q))
                else:
                    todo.append(allocator)

            # Process counts.
            total = 0
            for i in range(len(self._allocators)):
                worker_q, retval, exc, trace = self._reply_q.get()
                if exc:
                    self._logger.error(trace)
                    raise exc

                try:
                    next_allocator = todo.pop(0)
                except IndexError:
                    WorkerPool.release(worker_q)
                else:
                    worker_q.put((self._get_count,
                                  (next_allocator, resource_desc, credentials),
                                  {}, self._reply_q))
                count = retval
                if count:
                    total += count
            return total
예제 #6
0
    def test_basic(self):
        logging.debug('')
        logging.debug('test_basic')

        worker_q = WorkerPool.get()
        worker_q.put((self.add, (1, ), {}, self.reply_q))

        done_q, retval, exc, trace = self.reply_q.get()
        self.assertEqual(done_q, worker_q)
        self.assertEqual(retval, -1)
        self.assertEqual(exc, None)
        self.assertEqual(trace, None)
        self.assertEqual(self.total, 1)

        WorkerPool.release(worker_q)
        WorkerPool.cleanup()
    def test_basic(self):
        logging.debug('')
        logging.debug('test_basic')

        worker_q = WorkerPool.get()
        worker_q.put((self.add, (1,), {}, self.reply_q))

        done_q, retval, exc, trace = self.reply_q.get()
        self.assertEqual(done_q, worker_q)
        self.assertEqual(retval, -1)
        self.assertEqual(exc, None)
        self.assertEqual(trace, None)
        self.assertEqual(self.total, 1)

        WorkerPool.release(worker_q)
        WorkerPool.cleanup()
예제 #8
0
    def test_exception(self):
        logging.debug('')
        logging.debug('test_exception')

        tail = "TypeError: unsupported operand type(s) for +=: 'int' and 'NoneType'\n"
        worker_q = WorkerPool.get()
        worker_q.put((self.add, (None, ), {}, self.reply_q))

        done_q, retval, exc, trace = self.reply_q.get()
        self.assertEqual(done_q, worker_q)
        self.assertEqual(retval, None)
        self.assertEqual(type(exc), TypeError)
        self.assertTrue(trace.endswith(tail))
        self.assertEqual(self.total, 0)

        WorkerPool.release(worker_q)
        WorkerPool.cleanup()
    def test_exception(self):
        logging.debug('')
        logging.debug('test_exception')

        tail = "TypeError: unsupported operand type(s) for +=: 'int' and 'NoneType'\n"
        worker_q = WorkerPool.get()
        worker_q.put((self.add, (None,), {}, self.reply_q))

        done_q, retval, exc, trace = self.reply_q.get()
        self.assertEqual(done_q, worker_q)
        self.assertEqual(retval, None)
        self.assertEqual(type(exc), TypeError)
        self.assertTrue(trace.endswith(tail))
        self.assertEqual(self.total, 0)

        WorkerPool.release(worker_q)
        WorkerPool.cleanup()
예제 #10
0
    def _start_hosts(self, address, credentials):
        """
        Start host managers. Sequence for each host is:
        1. Check connectivity via simple 'ssh' call.
        2. Send startup files.
        3. Invoke remote Python process. (state 'started')
        4. Receive remote connection information. (state 'up')
        """
        # Start first set of hosts.
        todo = []
        max_workers = 5  # Somewhat related to listener backlog.
        for i, host in enumerate(self._hostlist):
            if i < max_workers:
                worker_q = WorkerPool.get()
                _LOGGER.info('Starting host %s...', host.hostname)
                worker_q.put(
                    (self._start_manager, (host, i, address, credentials), {},
                     self._reply_q))
            else:
                todo.append(host)

        # Wait for worker, start next host.
        for i in range(len(self._hostlist)):
            worker_q, host, exc, trace = self._reply_q.get()
            if exc:
                _LOGGER.error(trace)
                raise exc

            _LOGGER.debug('Host %r state %s', host.hostname, host.state)
            try:
                next_host = todo.pop(0)
            except IndexError:
                WorkerPool.release(worker_q)
            else:
                _LOGGER.info('Starting host %s...', next_host.hostname)
                worker_q.put(
                    (self._start_manager, (next_host, i + max_workers, address,
                                           credentials), {}, self._reply_q))
    def _start_hosts(self, address, credentials):
        """
        Start host managers. Sequence for each host is:
        1. Check connectivity via simple 'ssh' call.
        2. Send startup files.
        3. Invoke remote Python process. (state 'started')
        4. Receive remote connection information. (state 'up')
        """
        # Start first set of hosts.
        todo = []
        max_workers = 5  # Somewhat related to listener backlog.
        for i, host in enumerate(self._hostlist):
            if i < max_workers:
                worker_q = WorkerPool.get()
                _LOGGER.info('Starting host %s...', host.hostname)
                worker_q.put((self._start_manager,
                             (host, i, address, credentials), {},
                              self._reply_q))
            else:
                todo.append(host)

        # Wait for worker, start next host.
        for i in range(len(self._hostlist)):
            worker_q, host, exc, trace = self._reply_q.get()
            if exc:
                _LOGGER.error(trace)
                raise exc

            _LOGGER.debug('Host %r state %s', host.hostname, host.state)
            try:
                next_host = todo.pop(0)
            except IndexError:
                WorkerPool.release(worker_q)
            else:
                _LOGGER.info('Starting host %s...', next_host.hostname)
                worker_q.put((self._start_manager,
                              (next_host, i+max_workers, address, credentials),
                               {}, self._reply_q))
예제 #12
0
    def time_estimate(self, resource_desc):
        """
        Returns ``(estimate, criteria)`` indicating how well this allocator
        can satisfy the `resource_desc` request.  The estimate will be:

        - >0 for an estimate of walltime (seconds).
        -  0 for no estimate.
        - -1 for no resource at this time.
        - -2 for no support for `resource_desc`.

        The returned criteria is a dictionary containing information related
        to the estimate, such as hostnames, load averages, unsupported
        resources, etc.

        This allocator polls each :class:`LocalAllocator` in the cluster
        to find the best match and returns that.  The best allocator is saved
        in the returned criteria for a subsequent :meth:`deploy`.

        resource_desc: dict
            Description of required resources.
        """
        credentials = get_credentials()

        key = "allocator"
        value = resource_desc.get(key, "")
        if value:
            if self.name != value:
                return (-2, {key: value})
            else:
                # Any host in our cluster is OK.
                resource_desc = resource_desc.copy()
                del resource_desc[key]

        n_cpus = resource_desc.get("n_cpus", 0)
        if n_cpus:
            # Spread across LocalAllocators.
            resource_desc = resource_desc.copy()
            resource_desc["n_cpus"] = 1

        with self._lock:
            best_estimate = -2
            best_criteria = None
            best_allocator = None

            # Prefer not to repeat use of just-used allocator.
            prev_estimate = -2
            prev_criteria = None
            prev_allocator = self._last_deployed
            self._last_deployed = None

            # Drain _reply_q.
            while True:
                try:
                    self._reply_q.get_nowait()
                except Queue.Empty:
                    break

            # Get estimates via worker threads.
            todo = []
            max_workers = 10
            for i, allocator in enumerate(self._allocators.values()):
                if i < max_workers:
                    worker_q = WorkerPool.get()
                    worker_q.put((self._get_estimate, (allocator, resource_desc, credentials), {}, self._reply_q))
                else:
                    todo.append(allocator)

            # Process estimates.
            host_loads = []  # Sorted list of (hostname, load)
            for i in range(len(self._allocators)):
                worker_q, retval, exc, trace = self._reply_q.get()
                if exc:
                    self._logger.error(trace)
                    retval = None

                try:
                    next_allocator = todo.pop(0)
                except IndexError:
                    WorkerPool.release(worker_q)
                else:
                    worker_q.put((self._get_estimate, (next_allocator, resource_desc, credentials), {}, self._reply_q))

                if retval is None:
                    continue
                allocator, estimate, criteria = retval
                if estimate is None:
                    continue

                # Update loads.
                if estimate >= 0 and n_cpus:
                    load = criteria["loadavgs"][0]
                    new_info = (criteria["hostnames"][0], load)
                    if host_loads:
                        for i, info in enumerate(host_loads):
                            if load < info[1]:
                                host_loads.insert(i, new_info)
                                break
                        else:
                            host_loads.append(new_info)
                    else:
                        host_loads.append(new_info)

                # Update best estimate.
                if allocator is prev_allocator:
                    prev_estimate = estimate
                    prev_criteria = criteria
                elif (best_estimate <= 0 and estimate > best_estimate) or (
                    best_estimate > 0 and estimate < best_estimate
                ):
                    best_estimate = estimate
                    best_criteria = criteria
                    best_allocator = allocator
                elif best_estimate == 0 and estimate == 0:
                    best_load = best_criteria["loadavgs"][0]
                    load = criteria["loadavgs"][0]
                    if load < best_load:
                        best_estimate = estimate
                        best_criteria = criteria
                        best_allocator = allocator

            # If no alternative, repeat use of previous allocator.
            if best_estimate < 0 and prev_estimate >= 0:
                best_estimate = prev_estimate
                best_criteria = prev_criteria
                best_allocator = prev_allocator

            # Save best allocator in criteria in case we're asked to deploy.
            if best_criteria is not None:
                best_criteria["allocator"] = best_allocator

                # Save n_cpus hostnames in criteria.
                best_criteria["hostnames"] = [host_loads[i][0] for i in range(min(n_cpus, len(host_loads)))]

            return (best_estimate, best_criteria)