def thread_target(): try: logger.debug(f"thread '{name}' starting") if _prctl_available: prctl.set_name(name) if _numa_available and numa.available(): numa.set_localalloc() logger.debug( f"Set NUMA local allocation policy on thread {name}") worker(index) logger.debug(f"thread '{name}' finishing") except Exception: logger.critical("Exception occured in thread; exiting") logger.critical(traceback.format_exc()) # Communicate back the main thread that something bad has happened. # This seems to be the only reliable way to do it. _thread.interrupt_main() # Now we still need to make sure that the main thread doesn't block # on the queue.get/join (as it won't be interrupted). This is an attempt # to make sure that it unblocks. May not be fool-proof though. # # TODO This doesn't really work. We can still block on pushing things # onto the queue. We'll probably have to do something ourselves using # timeouts and stuff to see if an error has occured. if consumer: while True: try: work_queue.task_done() except ValueError: break else: work_queue.put(None)
def check_numa(): try: import numa except: return if not numa.available(): return if numa.get_max_node() > 0 and len(numa.get_run_on_node_mask()) > 1: print("Warning: NUMA settings may be suboptimal!", file=sys.stderr)
def coremap(): try: import numa except ImportError: print('This script requires the libnuma python bindings') raise RuntimeError("Numa not available") if not numa.available(): raise RuntimeError("Numa not available") node_to_core = {int(i): deque([int(k) for k in numa.node_to_cpus(i)]) for i in range(numa.get_max_node() + 1)} total_core = max(itertools.chain(*node_to_core.values())) + 1 return node_to_core, total_core
def load_numa(): """ Load information about core numbers and numa patterns """ if not numa.available(): raise Exception('Numa detection not available') max_node = numa.get_max_node() nodes = {} for i in range(max_node + 1): nodes[i] = list(numa.node_to_cpus(i)) return nodes
def _configure_numa(self): self._numa_available = \ numa.available() and which('numactl') is not None if not self._numa_available: return num_numa_nodes = numa.get_max_node() + 1 self._numa_cpu_map = {} num_gpus = len(self._gpu_ids) # Calculate how many CPUs to allocate for each GPU. Ensure this number # is a power of 2. num_cpus = 0 for i in range(num_numa_nodes): num_cpus += len(numa.node_to_cpus(i)) num_cpus_per_gpu = min(MAX_CPUS_PER_GPU, max(num_cpus // num_gpus, 1)) num_cpus_per_gpu = pow(2, round(math.log(num_cpus_per_gpu, 2))) # Find blocks of contiguous CPUs. contiguous_blocks = [] for i in range(num_numa_nodes): cpus = sorted(numa.node_to_cpus(i)) contiguous_block = [cpus[0]] for j in range(1, len(cpus)): if (cpus[j] - cpus[j - 1] == 1 and len(contiguous_block) < num_cpus_per_gpu): contiguous_block.append(cpus[j]) else: contiguous_blocks.append( (contiguous_block, len(contiguous_block))) contiguous_block = [cpus[j]] if len(contiguous_block) > 0: contiguous_blocks.append( (contiguous_block, len(contiguous_block))) contiguous_blocks.sort(key=lambda x: x[-1], reverse=True) # Assign CPUs to GPUs. block_idx = 0 for i in range(num_gpus): self._numa_cpu_map[i] = [] while len(self._numa_cpu_map[i]) < num_cpus_per_gpu: self._numa_cpu_map[i] += contiguous_blocks[block_idx][0] block_idx = (block_idx + 1) % len(contiguous_blocks) self._logger.info('GPU {gpu} assigned CPUs {cpus}'.format( gpu=i, cpus=str(self._numa_cpu_map[i])))
def _lazy_cpu_and_mem_set_init(self): # Implicitly assume lock is already held if len(self._numa_nodes) != 0: # Init already happened return if (self._available_cpu_ids is None or self._cpus_per_job is None or self._use_memset_of_nearest_node is None): raise Exception('Cannot do init. One or more params were None') import numa if not numa.available(): raise Exception('NUMA not available') numa_nodes = list(range(0, numa.get_max_node() + 1)) cpu_count = 0 for numa_node in numa_nodes: cpus = numa.node_to_cpus(numa_node) for cpu_id in cpus: if cpu_id in self._available_cpu_ids: try: self._numa_nodes[numa_node].add(cpu_id) except KeyError: self._numa_nodes[numa_node] = set() self._numa_nodes[numa_node].add(cpu_id) try: self._numa_node_pool[numa_node].add(cpu_id) except KeyError: self._numa_node_pool[numa_node] = set() self._numa_node_pool[numa_node].add(cpu_id) _logger.info( 'Putting CPU {} in NUMA node {} in resource pool'. format(cpu_id, numa_node)) cpu_count += 1 else: _logger.info( 'CPU {} in NUMA node {} is NOT IN resource pool'. format(cpu_id, numa_node)) if cpu_count == 0: raise Exception('Found no available CPUs') if cpu_count != len(self._available_cpu_ids): raise Exception( 'Mismatch between provided available CPU ids and what was found on system' ) assert len(self._numa_node_pool) == len(self._numa_nodes)
def test_available(self): self.failUnlessEqual(True, numa.available())
def test_available(self): self.assertEqual(True, numa.available())