def parse_local_resources(config): """Parse resources passed in configuration. The user can specify in configuration "virtual" resources such as number of nodes and cores. Args: config (dict): QCG-PilotJob configuration Returns: Resources: available resources Raises: ValueError: in case of missing node configuration or wrong number of cores configuration """ nodes_conf = Config.EXECUTION_NODES.get(config) if nodes_conf: nodes = [] nodes_list = nodes_conf.split(',') for nidx, node in enumerate(nodes_list): nname, _, ncores = node.rpartition(':') nname = nname or 'n{}'.format(nidx) if not ncores or int(ncores) < 1: raise ValueError('number of cores for a node must not be less than 1') nodes.append(Node(nname, int(ncores), 0)) else: nodes = [Node(socket.gethostname(), mp.cpu_count(), 0)] if not nodes: raise ValueError('no node available') return Resources(ResourcesType.LOCAL, nodes)
async def handle_register_req(self, iface, request): #pylint: disable=unused-argument """Handle register request. Args: iface (interface): the interface from which request came request (RegisterReq): request Returns: Response: response to send back """ if self.stop_processing: return Response.error('processing stopped') if request.params['id'] in self.managers: return Response.error('Manager with id "{}" already registered') if not request.params['address']: return Response.error('Missing registry entity address') try: resources = Resources.from_dict(request.params['resources']) self.managers[request.params['id']] = ManagerInstance( request.params['id'], resources, request.params['address']) self.total_resources.append(resources) except Exception: return Response.error('Failed to register manager: {}'.format( sys.exc_info()[0])) _logger.info( '%sth manager instance %s @ %s with resources (%s) registered successfully', len(self.managers), request.params['id'], request.params['address'], request.params['resources']) return Response.ok(data={'id': request.params['id']})
def parse_slurm_resources(config): """Return resources availabe in Slurm allocation. Args: config (dict): QCG-PilotJob configuration Returns: Resources: resources available in Slurm allocation """ if 'SLURM_NODELIST' not in os.environ: raise SlurmEnvError("missing SLURM_NODELIST settings") if 'SLURM_JOB_CPUS_PER_NODE' not in os.environ: raise SlurmEnvError("missing SLURM_JOB_CPUS_PER_NODE settings") allocation_data = None if config.get('parse_nodes', 'yes') == 'yes': allocation_data = get_allocation_data() slurm_nodes = os.environ['SLURM_NODELIST'] if config.get('parse_nodes', 'yes') == 'no': node_names = slurm_nodes.split(',') else: node_names = parse_nodelist(slurm_nodes) node_start = Config.SLURM_LIMIT_NODES_RANGE_BEGIN.get(config) if not node_start: node_start = 0 node_end = Config.SLURM_LIMIT_NODES_RANGE_END.get(config) if not node_end: node_end = len(node_names) else: node_end = min(node_end, len(node_names)) _logger.debug('node range %d - %d from %d total nodes', node_start, node_end, len(node_names)) if node_start > node_end: raise SlurmEnvError( 'Invalid node range arguments - node start {} greater than node end {}' .format(node_start, node_end)) slurm_job_cpus_def = os.environ['SLURM_JOB_CPUS_PER_NODE'] job_cpus = parse_slurm_job_cpus(slurm_job_cpus_def) slurm_tasks_def = os.environ['SLURM_TASKS_PER_NODE'] job_tasks = parse_slurm_job_cpus(slurm_tasks_def) if Config.PARENT_MANAGER.get(config) and len(node_names) != len(job_tasks): # TODO: dirty hack - to fix problems with ht & governor manaagers job_tasks = job_cpus if len(node_names) != len(job_cpus) or len(node_names) != len(job_tasks): raise SlurmEnvError( "failed to parse slurm env: number of nodes ({}) mismatch number of job cpus ({})" "/job tasks ({}), ({})".format(len(node_names), len(job_cpus), len(job_tasks), slurm_tasks_def)) core_ids = None binding = False # slots_num = job_cpus slots_num = job_tasks if allocation_data is not None and 'CPU_IDs' in allocation_data['map']: cpu_ids = parse_slurm_allocation_cpu_ids( allocation_data['list'], node_names[node_start:node_end], job_cpus[node_start:node_end]) _logger.debug('got available cpus per node: {}'.format(','.join([ '{}: [{}]'.format(node_name, ','.join([str(core) for core in node_cores])) for node_name, node_cores in cpu_ids.items() ]))) core_ids = { node_name: [str(cpu_id) for cpu_id in node_cpus] for node_name, node_cpus in cpu_ids.items() } _logger.debug('temporary core ids per node: {}'.format(','.join([ '{}: [{}]'.format(node_name, ','.join([str(core) for core in node_cores])) for node_name, node_cores in cpu_ids.items() ]))) if job_cpus != job_tasks and all(job_cpus[i] == job_cpus[i + 1] for i in range(len(job_cpus) - 1)): # the number of available cpu's differs from number of requested tasks - two situations: # a) user want's execute less tasks than available cpu's # b) hyper-threading (many cpus for single core) # in both situations we should execute only as much jobs as requested tasks # with hyper-threading we should also bind many cpus to a single job # to be able to assign system cpu's to physical cores we need information about cpu - for example from # execution of 'lscpu' - which need to be execute on each node - but currently we assume that all nodes # in allocation are homogenous _logger.debug( 'number of tasks ({}) differs from number of cpus ({}) - checking local cpus vs cores' .format(','.join([str(task) for task in job_tasks]), ','.join([str(cpu) for cpu in job_cpus]))) local_core_info, local_cpu_info = parse_local_cpus() # if len(local_core_info) != len(local_cpu_info): _logger.info( "number of available cpu's {} differs from number of available cores {} - " "HyperThreading".format(len(local_core_info), len(local_cpu_info))) core_ids = {} # group available cpu's into cores for node_idx in range(node_start, node_end): node_name = node_names[node_idx] node_cpus = cpu_ids[node_name] node_tasks = job_tasks[node_idx] if len(local_core_info) == node_tasks: # number of tasks on a node is the same as number of cores core_ids[node_name] = [ ','.join(local_core_info[core_id]) for core_id in local_core_info ] else: # partition available cpu's on given number of tasks if node_tasks < len(local_core_info): # partition available cores on given number of tasks avail_cores = len(local_core_info) first_core = 0 node_task_cpu_ids = [] for task_nr in range(node_tasks): node_cores = floor(avail_cores / (node_tasks - task_nr)) # _logger.debug('assigning #{} ({} - {}) cores for task {}'.format(node_cores, first_core, first_core + node_cores, task_nr)) cores_cpu_list = [] for core_id in range(first_core, first_core + node_cores): cores_cpu_list.extend( local_core_info[str(core_id)]) node_task_cpu_ids.append(','.join(cores_cpu_list)) # _logger.debug('assinged ({}) cpus for task {}'.format(node_task_cpu_ids[-1], task_nr)) first_core += node_cores avail_cores -= node_cores core_ids[node_name] = node_task_cpu_ids else: # TODO: this needs futher work raise Exception('Not supported') binding = True elif 'SLURM_CPU_BIND_LIST' in os.environ and 'SLURM_CPU_BIND_TYPE' in os.environ and \ os.environ['SLURM_CPU_BIND_TYPE'].startswith('mask_cpu'): core_ids = parse_slurm_env_binding(os.environ['SLURM_CPU_BIND_LIST'], node_names[node_start:node_end], job_cpus[node_start:node_end]) binding = True else: _logger.warning( 'warning: failed to get slurm binding information - missing SLURM_CPU_BIND_LIST and CPU_IDSs' ) if binding: _logger.debug('core binding for {} nodes: {}'.format( len(core_ids), '; '.join([ '{}: {}'.format(str(node), str(slots)) for node, slots in core_ids.items() ]))) else: _logger.warning('warning: no binding information') n_crs = None if 'CUDA_VISIBLE_DEVICES' in os.environ: n_crs = { CRType.GPU: CRBind(CRType.GPU, os.environ['CUDA_VISIBLE_DEVICES'].split(',')) } nodes = [ Node( node_names[i], slots_num[i], 0, core_ids=core_ids[node_names[i]] if core_ids is not None else None, crs=n_crs) for i in range(node_start, node_end) ] _logger.debug('generated %d nodes %s binding', len(nodes), 'with' if binding else 'without') return Resources(ResourcesType.SLURM, nodes, binding)
def test_scheduler_allocate_job_gpus(): nCores = [8, 8, 8, 8] nGpus = [2, 4, 4, 4] nCrs = [{ CRType.GPU: CRBind(CRType.GPU, list(range(nGpu))) } for nGpu in nGpus] r = Resources(ResourcesType.LOCAL, [ Node("n.{}".format(i), total_cores=nCores[i], used=0, crs=nCrs[i]) for i in range(len(nCores)) ]) s = Scheduler(r) assert s assert all((len(r.nodes) == len(nCores), [r.nodes[i] == "n.{}".format(i) for i in range(len(nCores))])) assert all([ r.nodes[i].total == nCores[i] and r.nodes[i].used == 0 and r.nodes[i].free == nCores[i] for i in range(len(nCores)) ]) assert all([len(r.nodes[i].crs) == 1 and CRType.GPU in r.nodes[i].crs and r.nodes[i].crs[CRType.GPU].total_count == nGpus[i] and \ r.nodes[i].crs[CRType.GPU].used == 0 and r.nodes[i].crs[CRType.GPU].available == nGpus[i] for i in range(len(nCores))]) assert all((r.total_cores == sum(nCores), r.free_cores == sum(nCores), r.used_cores == 0)) # allocate half of the first node's cpus and one gpu job1_r_c = 4 job1_r_n = 1 job1_r_g = 1 job1_r = JobResources(numCores=job1_r_c, numNodes=job1_r_n, nodeCrs={'gpu': job1_r_g}) assert job1_r a1 = s.allocate_job(job1_r) assert a1 assert a1 and all( (a1.cores == job1_r_c * job1_r_n, len( a1.nodes) == job1_r_n, a1.nodes[0].node.name == "n.0", a1.nodes[0].cores == [str(cid) for cid in range(job1_r_c)])) assert a1.nodes[0].crs and ((CRType.GPU in a1.nodes[0].crs, a1.nodes[0].crs[CRType.GPU].count == job1_r_g, a1.nodes[0].crs[CRType.GPU].instances == list( range(job1_r_g)))) assert all( (r.total_cores == sum(nCores), r.free_cores == sum(nCores) - job1_r_c, r.used_cores == job1_r_c)) # try to allocate half of the node's cpus but with two gpu's - allocation should not fit to the first node job2_r_c = 4 job2_r_n = 1 job2_r_g = 2 job2_r = JobResources(numCores=job2_r_c, numNodes=job2_r_n, nodeCrs={'gpu': job2_r_g}) assert job2_r a2 = s.allocate_job(job2_r) assert a2 assert a2 and all( (a2.cores == job2_r_c * job2_r_n, len( a2.nodes) == job2_r_n, a2.nodes[0].node.name == "n.1", a2.nodes[0].cores == [str(cid) for cid in range(job2_r_c)])) assert a2.nodes[0].crs and ((CRType.GPU in a2.nodes[0].crs, a2.nodes[0].crs[CRType.GPU].count == job2_r_g, a2.nodes[0].crs[CRType.GPU].instances == list( range(job2_r_g)))) assert all((r.total_cores == sum(nCores), r.free_cores == sum(nCores) - job1_r_c - job2_r_c, r.used_cores == job1_r_c + job2_r_c)) # try to allocate node with exceeding gpu amount on node job3_r_c = 1 job3_r_n = 1 job3_r_g = max(nGpus) + 2 job3_r = JobResources(numCores=job3_r_c, numNodes=job3_r_n, nodeCrs={'gpu': job3_r_g}) assert job3_r with pytest.raises(NotSufficientResources): s.allocate_job(job3_r) # try to allocate node with exceeding total gpus amount job4_r_c = 1 job4_r_n = 4 job4_r_g = max(nGpus) job4_r = JobResources(numCores=job4_r_c, numNodes=job4_r_n, nodeCrs={'gpu': job4_r_g}) assert job4_r with pytest.raises(NotSufficientResources): s.allocate_job(job4_r) # allocate gpus across many nodes job5_r_c = 2 job5_r_n = 3 job5_r_g = 2 job5_r = JobResources(numCores=job5_r_c, numNodes=job5_r_n, nodeCrs={'gpu': job5_r_g}) assert job5_r a5 = s.allocate_job(job5_r) assert a5 and all( (a5.cores == job5_r_c * job5_r_n, len(a5.nodes) == job5_r_n, a5.nodes[0].node.name == "n.1", a5.nodes[0].cores == [ str(cid) for cid in range(job2_r_c, job2_r_c + job5_r_c) ], a5.nodes[1].node.name == "n.2", a5.nodes[1].cores == [str(cid) for cid in range(job5_r_c)], a5.nodes[2].node.name == "n.3", a5.nodes[2].cores == [str(cid) for cid in range(job5_r_c)])), str(a5) assert a5.nodes[0].crs and ((CRType.GPU in a5.nodes[0].crs, a5.nodes[0].crs[CRType.GPU].count == job5_r_g, a5.nodes[0].crs[CRType.GPU].instances == list( range(job5_r_g)))) assert a5.nodes[1].crs and ((CRType.GPU in a5.nodes[1].crs, a5.nodes[1].crs[CRType.GPU].count == job5_r_g, a5.nodes[1].crs[CRType.GPU].instances == list( range(job5_r_g)))) assert a5.nodes[2].crs and ((CRType.GPU in a5.nodes[2].crs, a5.nodes[2].crs[CRType.GPU].count == job5_r_g, a5.nodes[2].crs[CRType.GPU].instances == list( range(job5_r_g)))) assert all((r.total_cores == sum(nCores), r.free_cores == sum(nCores) - job1_r_c - job2_r_c - job5_r_c * job5_r_n, r.used_cores == job1_r_c + job2_r_c + job5_r_c * job5_r_n)) # allocate cpu's across many nodes job6_r_c = 16 job6_r = JobResources(numCores=job6_r_c) assert job6_r a6 = s.allocate_job(job6_r) assert a6 and all( (a6.cores == job6_r_c, len(a6.nodes) == 4, a6.nodes[0].node.name == "n.0", a6.nodes[0].cores == [ str(cid) for cid in range(job1_r_c, nCores[0]) ], a6.nodes[1].node.name == "n.1", a6.nodes[1].cores == [ str(cid) for cid in range(job2_r_c + job5_r_c, nCores[1]) ], a6.nodes[2].node.name == "n.2", a6.nodes[2].cores == [ str(cid) for cid in range(job5_r_c, nCores[2]) ], a6.nodes[3].node.name == "n.3", a6.nodes[3].cores == [str(cid) for cid in range(job5_r_c, job5_r_c + 4)])), str(a6) assert ([a6.nodes[i].crs is None for i in range(4)]) assert all( (r.total_cores == sum(nCores), r.free_cores == sum(nCores) - job1_r_c - job2_r_c - job5_r_c * job5_r_n - job6_r_c, r.used_cores == job1_r_c + job2_r_c + job5_r_c * job5_r_n + job6_r_c)) # release all allocations a1.release() a2.release() a5.release() a6.release() assert all((len(r.nodes) == len(nCores), [r.nodes[i] == "n.{}".format(i) for i in range(len(nCores))])) assert all([ r.nodes[i].total == nCores[i] and r.nodes[i].used == 0 and r.nodes[i].free == nCores[i] for i in range(len(nCores)) ]) assert all([len(r.nodes[i].crs) == 1 and CRType.GPU in r.nodes[i].crs and r.nodes[i].crs[CRType.GPU].total_count == nGpus[i] and \ r.nodes[i].crs[CRType.GPU].used == 0 and r.nodes[i].crs[CRType.GPU].available == nGpus[i] for i in range(len(nCores))]) assert all((r.total_cores == sum(nCores), r.free_cores == sum(nCores), r.used_cores == 0))
def test_scheduler_allocate_cores(): nCores = [8, 8, 8, 8] r = Resources(ResourcesType.LOCAL, [ Node("n.{}".format(i), total_cores=nCores[i], used=0) for i in range(len(nCores)) ]) s = Scheduler(r) assert s assert all((len(r.nodes) == len(nCores), [r.nodes[i] == "n.{}".format(i) for i in range(len(nCores))])) assert all([ r.nodes[i].total == nCores[i] and r.nodes[i].used == 0 and r.nodes[i].free == nCores[i] for i in range(len(nCores)) ]) assert all((r.total_cores == sum(nCores), r.free_cores == sum(nCores), r.used_cores == 0)) c1 = 4 a1 = s.allocate_cores(c1) assert a1 and all( (a1.cores == c1, len(a1.nodes) == 1, a1.nodes[0].cores == [str(cid) for cid in range(c1)], a1.nodes[0].crs == None)) assert all((r.total_cores == sum(nCores), r.free_cores == sum(nCores) - c1, r.used_cores == c1)) c2_min = 4 c2_max = 8 a2 = s.allocate_cores(c2_min, c2_max) assert a2 and all( (a2.cores == c2_max, len(a2.nodes) == 2, a2.nodes[0].cores == [ str(cid) for cid in range(c1, nCores[0]) ], a2.nodes[0].crs == None, a2.nodes[1].cores == [ str(cid) for cid in range(c1 + c2_max - nCores[0]) ], a2.nodes[0].crs == None)) assert all((r.total_cores == sum(nCores), r.free_cores == sum(nCores) - c1 - c2_max, r.used_cores == c1 + c2_max)) s.release_allocation(a1) assert all((r.total_cores == sum(nCores), r.free_cores == sum(nCores) - c2_max, r.used_cores == c2_max)) c3_min = 2 c3_max = 4 a3 = s.allocate_cores(c3_min, c3_max) assert a3 and all( (a3.cores == c3_max, len(a3.nodes) == 1, a3.nodes[0].cores == [str(cid) for cid in range(c3_max)], a3.nodes[0].crs == None)) assert all((r.total_cores == sum(nCores), r.free_cores == sum(nCores) - c2_max - c3_max, r.used_cores == c2_max + c3_max)) c4 = 20 a4 = s.allocate_cores(c4) assert a4 and all( (a4.cores == c4, len(a4.nodes) == 3, a4.nodes[0].cores == [ str(cid) for cid in range(c1 + c2_max - nCores[0], nCores[1]) ], a4.nodes[0].crs == None, a4.nodes[1].cores == [ str(cid) for cid in range(nCores[2]) ], a4.nodes[1].crs == None, a4.nodes[2].cores == [str(cid) for cid in range(nCores[3])], a4.nodes[2].crs == None)) assert all((r.total_cores == sum(nCores), r.free_cores == sum(nCores) - c2_max - c3_max - c4 == 0, r.used_cores == c2_max + c3_max + c4 == r.total_cores)) # no more available resources c5 = 1 a5 = s.allocate_cores(c5) assert a5 == None assert all((r.total_cores == sum(nCores), r.free_cores == sum(nCores) - c2_max - c3_max - c4 == 0, r.used_cores == c2_max + c3_max + c4 == r.total_cores)) c6_min = 1 c6_max = 2 a6 = s.allocate_cores(c6_min, c6_max) assert a6 == None assert all((r.total_cores == sum(nCores), r.free_cores == sum(nCores) - c2_max - c3_max - c4 == 0, r.used_cores == c2_max + c3_max + c4 == r.total_cores)) # release all allocations s.release_allocation(a2) s.release_allocation(a3) s.release_allocation(a4) assert all((r.total_cores == sum(nCores), r.free_cores == sum(nCores), r.used_cores == 0))
def test_resources_export(): nodes = [Node('n1', 10), Node('n2', 4, 0, ["2", "3", "4", "5"])] res = Resources(ResourcesType.LOCAL, nodes, False) assert all( (res.total_nodes == 2, res.total_cores == sum([n.total for n in nodes]), res.used_cores == 0, res.free_cores == res.total_cores, res.binding == False, res.rtype == ResourcesType.LOCAL)) res_copy = Resources.from_dict(res.to_dict()) assert all( (res_copy.total_nodes == res.total_nodes, res_copy.total_cores == res.total_cores, res_copy.used_cores == res.used_cores, res_copy.free_cores == res.free_cores, res_copy.binding == res.binding, res_copy.binding == res.binding)) for idx, n in enumerate(nodes): assert all( (n.ids == res_copy.nodes[idx].ids, n.ids == res.nodes[idx].ids, n.free_ids == res_copy.nodes[idx].free_ids, n.free_ids == res.nodes[idx].free_ids)) assert all((len(res.to_json()) > 0, res.to_json() == res_copy.to_json())) nodes = [Node('n1', 10, 3), Node('n2', 4, 2, ["2", "3", "4", "5"])] res = Resources(ResourcesType.SLURM, nodes, True) assert all( (res.total_nodes == 2, res.total_cores == sum([n.total for n in nodes]), res.used_cores == sum([n.used for n in nodes]), res.free_cores == sum([n.total for n in nodes]) - sum([n.used for n in nodes]), nodes[0].free_ids == [str(cid) for cid in range(3, 10)], nodes[1].free_ids == ["4", "5"], res.binding == True, res.rtype == ResourcesType.SLURM)), res.to_json() res_copy = Resources.from_dict(res.to_dict()) assert all( (res_copy.total_nodes == res.total_nodes, res_copy.total_cores == res.total_cores, res_copy.used_cores == res.used_cores, res_copy.free_cores == res.free_cores, res_copy.binding == res.binding, res_copy.binding == res.binding)) for idx, n in enumerate(nodes): assert all( (n.ids == res_copy.nodes[idx].ids, n.ids == res.nodes[idx].ids, n.free_ids == res_copy.nodes[idx].free_ids, n.free_ids == res.nodes[idx].free_ids)) assert all((len(res.to_json()) > 0, res.to_json() == res_copy.to_json())) # print(res.to_json()) n1Tot = 8 n2Tot = 8 n1GpuTot = 4 n2GpuTot = 2 r = Resources(ResourcesType.LOCAL, [ Node("n1", total_cores=n1Tot, used=0, core_ids=None, crs={CRType.GPU: CRBind(CRType.GPU, list(range(n1GpuTot)))}), Node("n2", total_cores=n2Tot, used=0, core_ids=None, crs={CRType.GPU: CRBind(CRType.GPU, list(range(n2GpuTot)))}) ], binding=False) assert all( (r != None, r.binding == False, r.rtype == ResourcesType.LOCAL, r.total_nodes == 2, r.total_cores == r.free_cores == n1Tot + n2Tot, r.used_cores == 0)) assert all( (r.nodes[0].name == 'n1', r.nodes[0].total == r.nodes[0].free == n1Tot, r.nodes[0].used == 0)) assert all( (r.nodes[1].name == 'n2', r.nodes[1].total == r.nodes[1].free == n2Tot, r.nodes[1].used == 0)) n1 = r.nodes[0] n2 = r.nodes[1] assert all((len(n1.crs) == 1, CRType.GPU in n1.crs, n1.crs[CRType.GPU].total_count == n1GpuTot, n1.crs[CRType.GPU].used == 0, n1.crs[CRType.GPU].available == n1GpuTot)) assert all((len(n2.crs) == 1, CRType.GPU in n2.crs, n2.crs[CRType.GPU].total_count == n2GpuTot, n2.crs[CRType.GPU].used == 0, n2.crs[CRType.GPU].available == n2GpuTot)) r_copy = Resources.from_dict(r.to_dict()) assert all((r_copy != None, r_copy.binding == False, r_copy.rtype == ResourcesType.LOCAL, r_copy.total_nodes == 2, r_copy.total_cores == r_copy.free_cores == n1Tot + n2Tot, r_copy.used_cores == 0)) assert all((r_copy.nodes[0].name == 'n1', r_copy.nodes[0].total == r_copy.nodes[0].free == n1Tot, r_copy.nodes[0].used == 0)) assert all((r_copy.nodes[1].name == 'n2', r_copy.nodes[1].total == r_copy.nodes[1].free == n2Tot, r_copy.nodes[1].used == 0)) n1 = r_copy.nodes[0] n2 = r_copy.nodes[1] assert all((len(n1.crs) == 1, CRType.GPU in n1.crs, n1.crs[CRType.GPU].total_count == n1GpuTot, n1.crs[CRType.GPU].used == 0, n1.crs[CRType.GPU].available == n1GpuTot)) assert all((len(n2.crs) == 1, CRType.GPU in n2.crs, n2.crs[CRType.GPU].total_count == n2GpuTot, n2.crs[CRType.GPU].used == 0, n2.crs[CRType.GPU].available == n2GpuTot)) assert all((len(r_copy.to_json()) > 0, r_copy.to_json() == r.to_json())) n1Tot = 8 n2Tot = 8 n1MemTot = 128 n2MemTot = 256 n2GpuTot = 4 r = Resources(ResourcesType.LOCAL, [ Node("n1", total_cores=n1Tot, used=0, core_ids=None, crs={CRType.MEM: CR(CRType.MEM, n1MemTot)}), Node("n2", total_cores=n2Tot, used=0, core_ids=None, crs={ CRType.MEM: CR(CRType.MEM, n2MemTot), CRType.GPU: CRBind(CRType.GPU, list(range(n2GpuTot))) }) ], binding=False) assert all( (r != None, r.binding == False, r.rtype == ResourcesType.LOCAL, r.total_nodes == 2, r.total_cores == r.free_cores == n1Tot + n2Tot, r.used_cores == 0)) assert all( (r.nodes[0].name == 'n1', r.nodes[0].total == r.nodes[0].free == n1Tot, r.nodes[0].used == 0)) assert all( (r.nodes[1].name == 'n2', r.nodes[1].total == r.nodes[1].free == n2Tot, r.nodes[1].used == 0)) n1 = r.nodes[0] n2 = r.nodes[1] assert all((len(n1.crs) == 1, CRType.MEM in n1.crs, n1.crs[CRType.MEM].total_count == n1MemTot, n1.crs[CRType.MEM].used == 0, n1.crs[CRType.MEM].available == n1MemTot)) assert all((len(n2.crs) == 2, CRType.MEM in n2.crs, n2.crs[CRType.MEM].total_count == n2MemTot, n2.crs[CRType.MEM].used == 0, n2.crs[CRType.MEM].available == n2MemTot, CRType.GPU in n2.crs, n2.crs[CRType.GPU].available == n2GpuTot)) r_copy = Resources.from_dict(r.to_dict()) assert all((r_copy != None, r_copy.binding == False, r_copy.rtype == ResourcesType.LOCAL, r_copy.total_nodes == 2, r_copy.total_cores == r_copy.free_cores == n1Tot + n2Tot, r_copy.used_cores == 0)) assert all((r_copy.nodes[0].name == 'n1', r_copy.nodes[0].total == r_copy.nodes[0].free == n1Tot, r_copy.nodes[0].used == 0)) assert all((r_copy.nodes[1].name == 'n2', r_copy.nodes[1].total == r_copy.nodes[1].free == n2Tot, r_copy.nodes[1].used == 0)) n1 = r_copy.nodes[0] n2 = r_copy.nodes[1] assert all((len(n1.crs) == 1, CRType.MEM in n1.crs, n1.crs[CRType.MEM].total_count == n1MemTot, n1.crs[CRType.MEM].used == 0, n1.crs[CRType.MEM].available == n1MemTot)) assert all((len(n2.crs) == 2, CRType.MEM in n2.crs, n2.crs[CRType.MEM].total_count == n2MemTot, n2.crs[CRType.MEM].used == 0, n2.crs[CRType.MEM].available == n2MemTot, CRType.GPU in n2.crs, n2.crs[CRType.GPU].available == n2GpuTot)) assert all((len(r_copy.to_json()) > 0, r_copy.to_json() == r.to_json()))
def test_resources_allocate_crs_mem(): n1Tot = 8 n2Tot = 8 n1MemTot = 128 n2MemTot = 256 n2GpuTot = 4 r = Resources(ResourcesType.LOCAL, [ Node("n1", total_cores=n1Tot, used=0, core_ids=None, crs={CRType.MEM: CR(CRType.MEM, n1MemTot)}), Node("n2", total_cores=n2Tot, used=0, core_ids=None, crs={ CRType.MEM: CR(CRType.MEM, n2MemTot), CRType.GPU: CRBind(CRType.GPU, list(range(n2GpuTot))) }) ], binding=False) assert all( (r != None, r.binding == False, r.rtype == ResourcesType.LOCAL, r.total_nodes == 2, r.total_cores == r.free_cores == n1Tot + n2Tot, r.used_cores == 0)) assert all( (r.nodes[0].name == 'n1', r.nodes[0].total == r.nodes[0].free == n1Tot, r.nodes[0].used == 0)) assert all( (r.nodes[1].name == 'n2', r.nodes[1].total == r.nodes[1].free == n2Tot, r.nodes[1].used == 0)) n1 = r.nodes[0] n2 = r.nodes[1] assert all((len(n1.crs) == 1, CRType.MEM in n1.crs, n1.crs[CRType.MEM].total_count == n1MemTot, n1.crs[CRType.MEM].used == 0, n1.crs[CRType.MEM].available == n1MemTot)) assert all((len(n2.crs) == 2, CRType.MEM in n2.crs, n2.crs[CRType.MEM].total_count == n2MemTot, n2.crs[CRType.MEM].used == 0, n2.crs[CRType.MEM].available == n2MemTot, CRType.GPU in n2.crs, n2.crs[CRType.GPU].available == n2GpuTot)) # create allocation with both CR's c1_c = n2.free - 2 c1_g = n2.crs[CRType.GPU].available - 1 c1_m = n2.crs[CRType.MEM].available - 20 a1 = n2.allocate_max(c1_c, {CRType.GPU: c1_g, CRType.MEM: c1_m}) assert a1 assert all((a1.ncores == c1_c, a1.cores == [ str(cid) for cid in range(c1_c) ])), "cores: {} vs expected {}".format(str(a1.cores), str(list(range(c1_c)))) assert a1.crs != None and all( (len(a1.crs) == 2, CRType.GPU in a1.crs, a1.crs[CRType.GPU].count == c1_g, a1.crs[CRType.GPU].instances == list(range(c1_g)), CRType.MEM in a1.crs, a1.crs[CRType.MEM].count == c1_m)), "crs: {}".format( str(a1.crs)) assert all((n2.total == n2Tot, n2.free == n2Tot - c1_c, n2.used == c1_c)) assert all((len(n2.crs) == 2, CRType.GPU in n2.crs, n2.crs[CRType.GPU].total_count == n2GpuTot, n2.crs[CRType.GPU].used == c1_g, n2.crs[CRType.GPU].available == n2GpuTot - c1_g, CRType.MEM in n2.crs, n2.crs[CRType.MEM].total_count == n2MemTot, n2.crs[CRType.MEM].used == c1_m, n2.crs[CRType.MEM].available == n2MemTot - c1_m)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c1_c, r.used_cores == c1_c)) # allocate the rest of the GPU's c2_c = n2.free c2_g = n2.crs[CRType.GPU].available a2 = n2.allocate_max(c2_c, {CRType.GPU: c2_g}) assert a2 assert all((a2.ncores == c2_c, a2.cores == [ str(cid) for cid in range(c1_c, c1_c + c2_c) ])), "cores: {} vs expected {}".format(str(a2.cores), str(list(range(c1_c, c1_c + c2_c)))) assert a2.crs != None and all( (len(a2.crs) == 1, CRType.GPU in a2.crs, a2.crs[CRType.GPU].count == c2_g, a2.crs[CRType.GPU].instances == list(range( c1_g, c1_g + c2_g)))) assert all((n2.total == n2Tot, n2.free == n2Tot - c1_c - c2_c, n2.used == c1_c + c2_c)) assert all( (len(n2.crs) == 2, CRType.GPU in n2.crs, n2.crs[CRType.GPU].total_count == n2GpuTot, n2.crs[CRType.GPU].used == c1_g + c2_g == n2GpuTot, n2.crs[CRType.GPU].available == n2GpuTot - c1_g - c2_g == 0, CRType.MEM in n2.crs, n2.crs[CRType.MEM].total_count == n2MemTot, n2.crs[CRType.MEM].used == c1_m, n2.crs[CRType.MEM].available == n2MemTot - c1_m)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c1_c - c2_c, r.used_cores == c1_c + c2_c)) # try to allocate mem - with no available cpu's c3_c = 1 c3_m = n2.crs[CRType.MEM].available a3 = n2.allocate_max(c3_c, {CRType.MEM: c3_m}) assert a3 == None assert all((n2.total == n2Tot, n2.free == n2Tot - c1_c - c2_c, n2.used == c1_c + c2_c)) assert all( (len(n2.crs) == 2, CRType.GPU in n2.crs, n2.crs[CRType.GPU].total_count == n2GpuTot, n2.crs[CRType.GPU].used == c1_g + c2_g == n2GpuTot, n2.crs[CRType.GPU].available == n2GpuTot - c1_g - c2_g == 0, CRType.MEM in n2.crs, n2.crs[CRType.MEM].total_count == n2MemTot, n2.crs[CRType.MEM].used == c1_m, n2.crs[CRType.MEM].available == n2MemTot - c1_m)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c1_c - c2_c, r.used_cores == c1_c + c2_c)) # try to allocate mem - with no available gpu's c3_c = 1 c3_m = n2.crs[CRType.MEM].available a3 = n2.allocate_max(c3_c, {CRType.MEM: c3_m}) assert a3 == None assert all((n2.total == n2Tot, n2.free == n2Tot - c1_c - c2_c, n2.used == c1_c + c2_c)) assert all( (len(n2.crs) == 2, CRType.GPU in n2.crs, n2.crs[CRType.GPU].total_count == n2GpuTot, n2.crs[CRType.GPU].used == c1_g + c2_g == n2GpuTot, n2.crs[CRType.GPU].available == n2GpuTot - c1_g - c2_g == 0, CRType.MEM in n2.crs, n2.crs[CRType.MEM].total_count == n2MemTot, n2.crs[CRType.MEM].used == c1_m, n2.crs[CRType.MEM].available == n2MemTot - c1_m)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c1_c - c2_c, r.used_cores == c1_c + c2_c)) # release some resources a1.release() assert all((n2.total == n2Tot, n2.free == n2Tot - c2_c, n2.used == c2_c)) assert all((len(n2.crs) == 2, CRType.GPU in n2.crs, n2.crs[CRType.GPU].total_count == n2GpuTot, n2.crs[CRType.GPU].used == c2_g, n2.crs[CRType.GPU].available == n2GpuTot - c2_g, CRType.MEM in n2.crs, n2.crs[CRType.MEM].total_count == n2MemTot, n2.crs[CRType.MEM].used == 0, n2.crs[CRType.MEM].available == n2MemTot)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c2_c, r.used_cores == c2_c)) # once more release already released resources - nothing should change a1.release() assert all((n2.total == n2Tot, n2.free == n2Tot - c2_c, n2.used == c2_c)) assert all((len(n2.crs) == 2, CRType.GPU in n2.crs, n2.crs[CRType.GPU].total_count == n2GpuTot, n2.crs[CRType.GPU].used == c2_g, n2.crs[CRType.GPU].available == n2GpuTot - c2_g, CRType.MEM in n2.crs, n2.crs[CRType.MEM].total_count == n2MemTot, n2.crs[CRType.MEM].used == 0, n2.crs[CRType.MEM].available == n2MemTot)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c2_c, r.used_cores == c2_c)) # allocate rest of cr c4_c = n2.free c4_g = n2.crs[CRType.GPU].available c4_m = n2.crs[CRType.MEM].available a4 = n2.allocate_max(c4_c, {CRType.MEM: c4_m, CRType.GPU: c4_g}) assert a4 assert all((a4.ncores == c4_c, a4.cores == [ str(cid) for cid in range(c1_c) ])), "cores: {} vs expected {}".format(str(a4.cores), str(list(range(c1_c)))) assert a4.crs != None and all( (len(a4.crs) == 2, CRType.GPU in a4.crs, a4.crs[CRType.GPU].count == c4_g, a4.crs[CRType.GPU].instances == list(range(c4_g)), CRType.MEM in a4.crs, a4.crs[CRType.MEM].count == c4_m)), "crs: {}".format( str(a4.crs)) assert all((n2.total == n2Tot, n2.free == n2Tot - c2_c - c4_c == 0, n2.used == c2_c + c4_c == n2Tot)) assert all( (len(n2.crs) == 2, CRType.GPU in n2.crs, n2.crs[CRType.GPU].total_count == n2GpuTot, n2.crs[CRType.GPU].used == c2_g + c4_g == n2GpuTot, n2.crs[CRType.GPU].available == n2GpuTot - c2_g - c4_g == 0, CRType.MEM in n2.crs, n2.crs[CRType.MEM].total_count == n2MemTot)) assert all((n2.crs[CRType.MEM].used == c4_m == n2MemTot, n2.crs[CRType.MEM].available == n2MemTot - c4_m == 0)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c2_c - c4_c, r.used_cores == c2_c + c4_c)) # release last allocation a4.release() assert all((n2.total == n2Tot, n2.free == n2Tot - c2_c, n2.used == c2_c)) assert all((len(n2.crs) == 2, CRType.GPU in n2.crs, n2.crs[CRType.GPU].total_count == n2GpuTot, n2.crs[CRType.GPU].used == c2_g, n2.crs[CRType.GPU].available == n2GpuTot - c2_g, CRType.MEM in n2.crs, n2.crs[CRType.MEM].total_count == n2MemTot, n2.crs[CRType.MEM].used == 0, n2.crs[CRType.MEM].available == n2MemTot)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c2_c, r.used_cores == c2_c)) # release once more already released resources - nothing should change a4.release() assert all((n2.total == n2Tot, n2.free == n2Tot - c2_c, n2.used == c2_c)) assert all((len(n2.crs) == 2, CRType.GPU in n2.crs, n2.crs[CRType.GPU].total_count == n2GpuTot, n2.crs[CRType.GPU].used == c2_g, n2.crs[CRType.GPU].available == n2GpuTot - c2_g, CRType.MEM in n2.crs, n2.crs[CRType.MEM].total_count == n2MemTot, n2.crs[CRType.MEM].used == 0, n2.crs[CRType.MEM].available == n2MemTot)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c2_c, r.used_cores == c2_c)) # release remaining allocation - all resources should be free a2.release() assert all((n2.total == n2Tot, n2.free == n2Tot, n2.used == 0)) assert all((len(n2.crs) == 2, CRType.GPU in n2.crs, n2.crs[CRType.GPU].total_count == n2GpuTot, n2.crs[CRType.GPU].used == 0, n2.crs[CRType.GPU].available == n2GpuTot, CRType.MEM in n2.crs, n2.crs[CRType.MEM].total_count == n2MemTot, n2.crs[CRType.MEM].used == 0, n2.crs[CRType.MEM].available == n2MemTot)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot, r.used_cores == 0)) # release some of mem cr and whole gpu set c5_c = n2.free - 2 c5_g = n2.crs[CRType.GPU].available c5_m = n2.crs[CRType.MEM].available - 20 a5 = n2.allocate_max(c5_c, {CRType.MEM: c5_m, CRType.GPU: c5_g}) assert a5 assert all((a5.ncores == c5_c, a5.cores == [ str(cid) for cid in range(c5_c) ])), "cores: {} vs expected {}".format(str(a5.cores), str(list(range(c5_c)))) assert a5.crs != None and all( (len(a5.crs) == 2, CRType.GPU in a5.crs, a5.crs[CRType.GPU].count == c5_g, a5.crs[CRType.GPU].instances == list(range(c5_g)), CRType.MEM in a5.crs, a5.crs[CRType.MEM].count == c5_m)), "crs: {}".format( str(a5.crs)) assert all((n2.total == n2Tot, n2.free == n2Tot - c5_c, n2.used == c5_c)) assert all( (len(n2.crs) == 2, CRType.GPU in n2.crs, n2.crs[CRType.GPU].total_count == n2GpuTot, n2.crs[CRType.GPU].used == c5_g == n2GpuTot, n2.crs[CRType.GPU].available == n2GpuTot - c5_g == 0, CRType.MEM in n2.crs, n2.crs[CRType.MEM].total_count == n2MemTot)) assert all((n2.crs[CRType.MEM].used == c5_m, n2.crs[CRType.MEM].available == n2MemTot - c5_m)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c5_c, r.used_cores == c5_c)) # try to allocate rest of mem cr and one of the gpu - allocation should not be created c6_c = n2.free c6_g = 1 c6_m = n2.crs[CRType.MEM].available a6 = n2.allocate_max(c6_c, {CRType.MEM: c6_m, CRType.GPU: c6_g}) assert a6 == None assert all((n2.total == n2Tot, n2.free == n2Tot - c5_c, n2.used == c5_c)) assert all( (len(n2.crs) == 2, CRType.GPU in n2.crs, n2.crs[CRType.GPU].total_count == n2GpuTot, n2.crs[CRType.GPU].used == c5_g == n2GpuTot, n2.crs[CRType.GPU].available == n2GpuTot - c5_g == 0, CRType.MEM in n2.crs, n2.crs[CRType.MEM].total_count == n2MemTot)) assert all((n2.crs[CRType.MEM].used == c5_m, n2.crs[CRType.MEM].available == n2MemTot - c5_m)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c5_c, r.used_cores == c5_c)) # release all allocations a5.release() assert all((n2.total == n2Tot, n2.free == n2Tot, n2.used == 0)) assert all((len(n2.crs) == 2, CRType.GPU in n2.crs, n2.crs[CRType.GPU].total_count == n2GpuTot, n2.crs[CRType.GPU].used == 0, n2.crs[CRType.GPU].available == n2GpuTot, CRType.MEM in n2.crs, n2.crs[CRType.MEM].total_count == n2MemTot, n2.crs[CRType.MEM].used == 0, n2.crs[CRType.MEM].available == n2MemTot)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot, r.used_cores == 0))
def test_resources_allocate_crs_gpu(): n1Tot = 8 n2Tot = 8 n1GpuTot = 4 n2GpuTot = 2 r = Resources(ResourcesType.LOCAL, [ Node("n1", total_cores=n1Tot, used=0, core_ids=None, crs={CRType.GPU: CRBind(CRType.GPU, list(range(n1GpuTot)))}), Node("n2", total_cores=n2Tot, used=0, core_ids=None, crs={CRType.GPU: CRBind(CRType.GPU, list(range(n2GpuTot)))}) ], binding=False) assert all( (r != None, r.binding == False, r.rtype == ResourcesType.LOCAL, r.total_nodes == 2, r.total_cores == r.free_cores == n1Tot + n2Tot, r.used_cores == 0)) assert all( (r.nodes[0].name == 'n1', r.nodes[0].total == r.nodes[0].free == n1Tot, r.nodes[0].used == 0)) assert all( (r.nodes[1].name == 'n2', r.nodes[1].total == r.nodes[1].free == n2Tot, r.nodes[1].used == 0)) n1 = r.nodes[0] n2 = r.nodes[1] assert all((len(n1.crs) == 1, CRType.GPU in n1.crs, n1.crs[CRType.GPU].total_count == n1GpuTot, n1.crs[CRType.GPU].used == 0, n1.crs[CRType.GPU].available == n1GpuTot)) assert all((len(n2.crs) == 1, CRType.GPU in n2.crs, n2.crs[CRType.GPU].total_count == n2GpuTot, n2.crs[CRType.GPU].used == 0, n2.crs[CRType.GPU].available == n2GpuTot)) # create partial allocation on the first node with gpu cr c1_c = 2 c1_g = 2 a1 = n1.allocate_max(c1_c, {CRType.GPU: c1_g}) assert a1 assert all( (a1.ncores == c1_c, a1.cores == [str(cid) for cid in range(c1_c) ])), "cores: {}".format(str(a1.cores)) assert a1.crs != None and all( (len(a1.crs) == 1, CRType.GPU in a1.crs, a1.crs[CRType.GPU].count == c1_g, a1.crs[CRType.GPU].instances == list( range(c1_g)))), "crs: {}".format(str(a1.crs)) assert all((n1.total == n1Tot, n1.free == n1Tot - c1_c, n1.used == c1_c)) assert all((len(n1.crs) == 1, CRType.GPU in n1.crs, n1.crs[CRType.GPU].total_count == n1GpuTot, n1.crs[CRType.GPU].used == c1_g, n1.crs[CRType.GPU].available == n1GpuTot - c1_g)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c1_c, r.used_cores == c1_c)) # try to allocate more crs than available, the allocation should not be created and state of resources should not change c2_c = 2 c2_g = n1GpuTot - c1_g + 2 a2 = n1.allocate_max(c2_c, {CRType.GPU: c2_g}) assert a2 == None assert all((n1.total == n1Tot, n1.free == n1Tot - c1_c, n1.used == c1_c)) assert all((len(n1.crs) == 1, CRType.GPU in n1.crs, n1.crs[CRType.GPU].total_count == n1GpuTot, n1.crs[CRType.GPU].used == c1_g, n1.crs[CRType.GPU].available == n1GpuTot - c1_g)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c1_c, r.used_cores == c1_c)) # create allocation for the rest of the cpus at the first node c3_c = n1Tot - c1_c a3 = n1.allocate_max(c3_c) assert a3 assert all((a3.ncores == c3_c, a3.cores == [ str(cid) for cid in range(c1_c, c1_c + c3_c) ])), "cores: {} vs expected {}".format(str(a3.cores), str(list(range(c1_c, c1_c + c3_c)))) assert a3.crs == None assert all((n1.total == n1Tot, n1.free == n1Tot - c1_c - c3_c == 0, n1.used == c1_c + c3_c == n1Tot)) assert all((len(n1.crs) == 1, CRType.GPU in n1.crs, n1.crs[CRType.GPU].total_count == n1GpuTot, n1.crs[CRType.GPU].used == c1_g, n1.crs[CRType.GPU].available == n1GpuTot - c1_g)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c1_c - c3_c, r.used_cores == c1_c + c3_c)) # try to allocate available crs but without available cpu's, the allocation should not be created and state of resources should not change c4_c = 1 c4_g = n1GpuTot - c1_g a4 = n1.allocate_max(c4_c, {CRType.GPU: c4_g}) assert a4 == None assert all((n1.total == n1Tot, n1.free == n1Tot - c1_c - c3_c == 0, n1.used == c1_c + c3_c == n1Tot)) assert all((len(n1.crs) == 1, CRType.GPU in n1.crs, n1.crs[CRType.GPU].total_count == n1GpuTot, n1.crs[CRType.GPU].used == c1_g, n1.crs[CRType.GPU].available == n1GpuTot - c1_g)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c1_c - c3_c, r.used_cores == c1_c + c3_c)) # release some cpus a3.release() assert all((n1.total == n1Tot, n1.free == n1Tot - c1_c, n1.used == c1_c)) assert all((len(n1.crs) == 1, CRType.GPU in n1.crs, n1.crs[CRType.GPU].total_count == n1GpuTot, n1.crs[CRType.GPU].used == c1_g, n1.crs[CRType.GPU].available == n1GpuTot - c1_g)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c1_c, r.used_cores == c1_c)) # release already released cpu's - nothing should change a3.release() assert all((n1.total == n1Tot, n1.free == n1Tot - c1_c, n1.used == c1_c)) assert all((len(n1.crs) == 1, CRType.GPU in n1.crs, n1.crs[CRType.GPU].total_count == n1GpuTot, n1.crs[CRType.GPU].used == c1_g, n1.crs[CRType.GPU].available == n1GpuTot - c1_g)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c1_c, r.used_cores == c1_c)) # allocate rest of the resources c5_c = n1.free c5_g = n1.crs[CRType.GPU].available a5 = n1.allocate_max(c5_c, {CRType.GPU: c5_g}) assert a5 assert all((a5.ncores == c5_c, a5.cores == [ str(cid) for cid in range(c1_c, c1_c + c5_c) ])), "cores: {} vs expected {}".format(str(a5.cores), str(list(range(c1_c, c1_c + c5_c)))) assert a5.crs != None and all( (len(a5.crs) == 1, CRType.GPU in a5.crs, a5.crs[CRType.GPU].count == c5_g, a5.crs[CRType.GPU].instances == list(range( c1_g, c1_g + c5_g)))), "crs: {}".format(str(a5.crs)) assert all((n1.total == n1Tot, n1.free == n1Tot - c1_c - c5_c == 0, n1.used == c1_c + c5_c == n1Tot)) assert all( (len(n1.crs) == 1, CRType.GPU in n1.crs, n1.crs[CRType.GPU].total_count == n1GpuTot, n1.crs[CRType.GPU].used == c1_g + c5_g == n1.crs[CRType.GPU].total_count, n1.crs[CRType.GPU].available == n1GpuTot - c1_g - c5_g == 0)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c1_c - c5_c, r.used_cores == c1_c + c5_c)) # release one gpu allocation a1.release() assert all((n1.total == n1Tot, n1.free == n1Tot - c5_c, n1.used == c5_c)) assert all((len(n1.crs) == 1, CRType.GPU in n1.crs, n1.crs[CRType.GPU].total_count == n1GpuTot, n1.crs[CRType.GPU].used == c5_g, n1.crs[CRType.GPU].available == n1GpuTot - c5_g)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c5_c, r.used_cores == c5_c)) # release once more already released gpu allocation - nothing should change a1.release() assert all((n1.total == n1Tot, n1.free == n1Tot - c5_c, n1.used == c5_c)) assert all((len(n1.crs) == 1, CRType.GPU in n1.crs, n1.crs[CRType.GPU].total_count == n1GpuTot, n1.crs[CRType.GPU].used == c5_g, n1.crs[CRType.GPU].available == n1GpuTot - c5_g)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c5_c, r.used_cores == c5_c)) # release rest of the resources a5.release() a5.release() assert all((n1.total == n1Tot, n1.free == n1Tot, n1.used == 0)) assert all((len(n1.crs) == 1, CRType.GPU in n1.crs, n1.crs[CRType.GPU].total_count == n1GpuTot, n1.crs[CRType.GPU].used == 0, n1.crs[CRType.GPU].available == n1GpuTot)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot, r.used_cores == 0))
def test_resources_allocate_general(): n1Tot = 12 n2Tot = 10 r = Resources(ResourcesType.LOCAL, [ Node("n1", total_cores=n1Tot, used=0, core_ids=None, crs=None), Node("n2", total_cores=n2Tot, used=0, core_ids=None, crs=None) ], binding=False) assert all( (r != None, r.binding == False, r.rtype == ResourcesType.LOCAL, r.total_nodes == 2, r.total_cores == r.free_cores == n1Tot + n2Tot, r.used_cores == 0)) assert all( (r.nodes[0].name == 'n1', r.nodes[0].total == r.nodes[0].free == n1Tot, r.nodes[0].used == 0)) assert all( (r.nodes[1].name == 'n2', r.nodes[1].total == r.nodes[1].free == n2Tot, r.nodes[1].used == 0)) # create partial allocation on the first node n1 = r.nodes[0] c1 = 4 a1 = n1.allocate_max(c1) assert all((a1, a1.ncores == c1, a1.cores == [str(cid) for cid in range(c1)], a1.crs == None)) assert all((n1.total == n1Tot, n1.free == n1Tot - c1, n1.used == c1)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c1, r.used_cores == c1)) # create partial allocation on the second node n2 = r.nodes[1] c2 = 8 a2 = n2.allocate_max(c2) assert all((a2, a2.ncores == c2, a2.cores == [str(cid) for cid in range(c2)], a2.crs == None)) assert all((n2.total == n2Tot, n2.free == n2Tot - c2, n2.used == c2)) assert all( (r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c1 - c2, r.used_cores == c1 + c2)) # request for the more resources then are available c3 = n1Tot - c1 + 2 c3Real = n1Tot - c1 a3 = n1.allocate_max(c3) assert all( (a3, a3.ncores == c3Real, a3.cores == [str(cid) for cid in range(c1, c1 + c3Real)], a3.crs == None)) assert all((n1.total == n1Tot, n1.free == n1Tot - c1 - c3Real == 0, n1.used == c1 + c3Real == n1Tot)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c1 - c2 - c3Real, r.used_cores == c1 + c2 + c3Real)) # request for no more resources c4 = 4 a4 = n1.allocate_max(c4) assert a4 == None assert all((n1.total == n1Tot, n1.free == n1Tot - c1 - c3Real == 0, n1.used == c1 + c3Real == n1Tot)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c1 - c2 - c3Real, r.used_cores == c1 + c2 + c3Real)) # release the first allocation (now we should have only c3Real allocated cores) a1.release() assert all( (n1.total == n1Tot, n1.free == n1Tot - c3Real, n1.used == c3Real)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c2 - c3Real, r.used_cores == c2 + c3Real)) # allocate rest of the free cores c5 = n1.free a5 = n1.allocate_max(c5) assert all((a5, a5.ncores == c5, a5.cores == [ str(cid) for cid in list(range(c1)) + list(range(c1 + c3Real, n1Tot)) ], a5.crs == None)) assert all((n1.total == n1Tot, n1.free == n1Tot - c3Real - c5 == 0, n1.used == c5 + c3Real == n1Tot)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c5 - c2 - c3Real, r.used_cores == c5 + c2 + c3Real)) # release once more the first, already released allocation - nothing should change a1.release() assert all((n1.total == n1Tot, n1.free == n1Tot - c3Real - c5 == 0, n1.used == c5 + c3Real == n1Tot)) assert all((r.total_cores == n1Tot + n2Tot, r.free_cores == n1Tot + n2Tot - c5 - c2 - c3Real, r.used_cores == c5 + c2 + c3Real)) # release all allocations for a in [a2, a3, a5]: a.release() assert all( (r.nodes[0].name == 'n1', r.nodes[0].total == r.nodes[0].free == n1Tot, r.nodes[0].used == 0)) assert all( (r.nodes[1].name == 'n2', r.nodes[1].total == r.nodes[1].free == n2Tot, r.nodes[1].used == 0)) assert all( (r.total_nodes == 2, r.total_cores == r.free_cores == n1Tot + n2Tot, r.used_cores == 0))