def get_network_load(self, job_dict): if 'num_gpu' not in job_dict: util.print_fn('No gpu information') return if 'model' not in job_dict: util.print_fn('No model information') return num_w = job_dict['num_gpu'] num_ps = num_w if num_w == 1: job_dict['ps_network'] = list() job_dict['w_network'] = list([0]) ''' check job ps_size ''' job_dict['ps_ave'] = 0 return job_dict['w_network'] = list([job_dict['model']['total_size']] * num_w) job_dict['ps_network'] = list([0] * num_ps) for i in range(0, len(job_dict['model']['tensors'])): ps_idx = int(i % num_ps) # job_dict['ps_network'][ps_idx] += (job_dict['model']['tensors'][i] * num_w) job_dict['ps_network'][ps_idx] += (job_dict['model']['tensors'][i]) for i in range(0, len(job_dict['ps_network'])): job_dict['ps_network'][i] = round(job_dict['ps_network'][i], 1) '''
def get_model(model_name): ''' get model tensor information by model_name return a dict{name, tensors(list)} ''' if model_name == 'vgg19': m_idx = 0 elif model_name == 'vgg16': m_idx = 1 elif model_name == 'vgg11': m_idx = 2 elif model_name == 'alexnet': m_idx = 3 elif model_name == 'resnet152': m_idx = 4 elif model_name == 'resnet101': m_idx = 5 elif model_name == 'resnet50': m_idx = 6 elif model_name == 'inception4': m_idx = 7 elif model_name == 'inception3': m_idx = 8 else: # m_idx = random.randint(0,8) m_idx = 8 util.print_fn('No model match, pick %s' % m_names[m_idx]) ret = { 'name': m_names[m_idx], 'ind': m_idx, 'tensors': m_tensors[m_idx], 'mem_util': m_mem[m_idx] } return ret
def get_job_model(self, job_dict): # if job_dict.has_key('model_name') and job_dict.has_key('model_scale'): if ('model_name' in job_dict) and ('model_scale' in job_dict): job_dict['model'] = models.get_model_with_scale( job_dict['model_name'], job_dict['model_scale']) else: util.print_fn('Not enough model information to get the details')
def add_task(self, task, pack=False): result = False if self.can_fit(task): if not pack and len(self.running_tasks) > 0: # not placing return False # add interference latency if len(self.running_tasks) >= 2: original_duration = task.original_duration sum_utilisation = 0 for _, t in self.running_tasks.items(): sum_utilisation += min( 100, np.random.normal(loc=t.gpu_utilization_avg, scale=(t.gpu_utilization_max - t.gpu_utilization_avg) / 4, size=1)) utilslowdown = np.polyval(NV_2080_COEF, sum_utilisation) new_duration = task.duration * utilslowdown task.interfered = True util.print_fn( "original duration: %.3f , new duration: %.3f, %d tasks on device %s, Node %s, sum utilisation %.3f and factor at %.3f" % (original_duration, new_duration, len( self.running_tasks), str(self.device_id), str(self.node_id), sum_utilisation, utilslowdown)) else: task.interfered = False task.duration = task.original_duration self.running_tasks[task.task_id] = task result = True return result
def prepare_job_start_events(self): ''' add job start events into job_events list end events should be added when they are starting ''' for job in self.job_list: start_t = job['submit_time'] # util.print_fn('%d, %d' % (start_t, end_t)) #for job start tmp_dict = util.search_dict_list(self.job_events, 'time', start_t) if tmp_dict == None: #not found, add the time into to job_events tmp_dict = dict() tmp_dict['time'] = start_t tmp_dict['start_jobs'] = list() tmp_dict['end_jobs'] = list() tmp_dict['start_jobs'].append(job) self.job_events.append(tmp_dict) else: tmp_dict['start_jobs'].append(job) job['status'] = 'EVENT' #job has been in EVENT status ''' sort events based on their time''' self.job_events.sort(key=lambda e: e.__getitem__('time')) util.print_fn('Init, add job start events') self.print_job_events()
def checkpoint_multi_dlas_gpu(self, job_queue, event_time): ''' Record cluster, and job information, including: time idle_node busy_node: gpu running full_node: all gpus are running idle_gpu busy_gpu pending_job running_job completed_job ''' idle_node = 0 busy_node = 0 full_node = 0 idle_gpu = 0 busy_gpu = 0 pending_job = 0 running_job = 0 completed_job = 0 if FLAGS.schedule != 'multi-dlas-gpu': util.print_fn("Error, not multi-dlas-gpu in checkpoint") exit() for num_gpu, gjob in job_queue.gpu_job.items(): idle_gpu += gjob.free_gpu busy_gpu = CLUSTER.num_gpu - idle_gpu busy_node = int(math.ceil(busy_gpu / CLUSTER.num_gpu_p_node)) full_node = busy_node idle_node = int(CLUSTER.num_node - busy_node) for job in job_queue.job_list: if job['status'] == 'RUNNING': running_job += 1 elif job['status'] == 'PENDING': pending_job += 1 elif job['status'] == 'END': completed_job += 1 #add log self.log_list.append([ event_time, int(idle_node), int(busy_node), int(full_node), int(idle_gpu), int(busy_gpu), int(pending_job), int(running_job), int(completed_job) ]) if len(self.log_list) >= 1: self.dump_all_logs()
def fit_first_sim_jobs(job_queue, cluster, logger): ''' new jobs are added to the end of the ending queue but any fit job should be executed in fifo order ''' while (len(job_queue.job_events) + len(job_queue.pending_jobs))> 0: if len(job_queue.job_events) == 0: util.print_fn("This cluster is not large enough to run the job") break event = job_queue.job_events[0] event_time = event['time'] # util.print_fn('--------------------------------- Handle event[time %d]------------------------------------' % event_time) #for ending jobs, release gpu for e_job in event['end_jobs']: #remove from migratable jobs, if it's there job_queue.remove_migratable(e_job) #job completes cluster.release_job_res(e_job) logger.job_complete(e_job, event_time) #for new-start jobs, try to start for s_job in event['start_jobs']: #add into pending list job_queue.move_to_pending(s_job) new_start_list = list() for p_job in job_queue.pending_jobs: # ret = CLUSTER.alloc_gpus(p_job) if cluster.check_free_gpu() <= 0: break ret = try_get_job_res(cluster, job_queue, p_job) if ret == True: ''' if remove_from_pending, then will miss the next p_job in the list ''' new_start_list.append(p_job) # JOBS.remove_from_pending(p_job, event_time) # JOBS.add_job_end_event(p_job) # util.print_fn('----job[%d] starts from pending' % p_job['job_idx']) else: continue for ns_job in new_start_list: job_queue.remove_from_pending(ns_job, event_time) job_queue.add_job_end_event(ns_job) util.print_fn('----job[%d] starts from pending' % ns_job['job_idx']) #sort pending jobs based on the num_gpu #JOBS.pending_jobs.sort(key = lambda e:e.__getitem__('num_gpu')) #remove time_event job_queue.job_events.pop(0) job_queue.job_events.sort(key=lambda e:e.__getitem__('time')) logger.checkpoint(job_queue, event_time)
def add_job(self, job): requirements = job.resource_requirements result = False if self.check_resources(requirements): self.alloc_job(requirements) self.jobs.append(job) job.migration_count += 1 result = True else: util.print_fn("Job does not fit on node", util.LOG_LEVEL_WARNING) return result
def end_job(self, e_job): if self.flags.schedule != 'multi-dlas-gpu': util.print_fn("Not multi-dlas-gpu") exit() num_gpu = e_job['num_gpu'] gjob = self.gpu_job[num_gpu] gjob.release_job_gpu(1) gjob.runnable_jobs.remove(e_job) # gjob.running_jobs.remove(e_job) gjob.queues[e_job['q_id']].remove(e_job) gjob.end_job += 1
def schedule_fifo(scheme, placement_algo, infrastructure, jobs_manager, delta, **kwargs): """NOTE: First in first out, does not preempt or migrate""" # F in F out, get the first job from the queue next_job = jobs_manager.get_next_job(delta) if next_job is None: util.print_fn("no job ready at time %d" % (delta)) return None, None, None assert next_job.is_waiting() nodes, success = placement_algo(infrastructure, next_job, scheme) if success: _ = jobs_manager.pop(delta) return nodes, next_job, success return nodes, next_job, success
def __init__(self, id, flags, job_queue, num_node=0, num_gpu_p_node=0, num_cpu_p_node=0, mem_p_node=0): self.num_node = num_node self.flags = flags self.job_queue = job_queue self.num_gpu_p_node = num_gpu_p_node self.num_cpu_p_node = num_cpu_p_node self.mem_p_node = mem_p_node self.id = id self.node_list = list() util.print_fn(' Switch[%d] has %d nodes' % (id, num_node))
def sort_all_jobs(self, mode=None): ''' Sort jobs based on their sumbit_time j1, num_gpu, start_t, end_t, duration ''' # tmp_list = sorted(self.job_list, key = lambda e:e.__getitem__('start_time')) # tmp_dict = util.search_dict_list(self.job_list, 'start_time', 4) # tmp_dict['end_time'] = 15 # print(tmp_dict) # self.job_list = tmp_list self.job_list.sort(key=lambda e: e.__getitem__('submit_time')) util.print_fn(' Jobs are sorted with their start time') # self.read_all_jobs() if self.flags.schedule == 'multi-dlas-gpu' and self.flags.scheme == 'count': for num_gpu, gjob in self.gpu_job.items(): util.print_fn('%d-GPU jobs have %d ' % (num_gpu, gjob.total_job))
def __init__(self, id, num_gpu=0, num_cpu=0, mem=0): self.id = id self.num_cpu = num_cpu self.free_cpus = num_cpu self.num_gpu = num_gpu self.free_gpus = num_gpu #network load: can be bw, or the amount of traffic # in and out should be the same self.network_in = 0 self.network_out = 0 self.mem = mem self.free_mem = mem #node class for gandiva self.job_gpu = 0 self.num_jobs = 0 util.print_fn(' Node[%d] has %d gpus, %d cpus, %d G memory' % (id, num_gpu, num_cpu, mem))
def print_all_job_size_info(self): ''' print job tensor info ''' ps_max_ave_fd = open('ps_max_ave.csv', 'w+') ps_max_ave_writer = csv.writer(ps_max_ave_fd) ps_max_ave_writer.writerow(['ps_max_ave']) ps_max99_ave_fd = open('ps_max99_ave.csv', 'w+') ps_max99_ave_writer = csv.writer(ps_max99_ave_fd) ps_max99_ave_writer.writerow(['ps_max99_ave']) w_fd = open('w.csv', 'w+') w_writer = csv.writer(w_fd) w_writer.writerow(['w']) ps_fd = open('ps.csv', 'w+') ps_writer = csv.writer(ps_fd) ps_writer.writerow(['ps']) ps_w_fd = open('ps_w.csv', 'w+') ps_w_writer = csv.writer(ps_w_fd) ps_w_writer.writerow(['ps_w']) util.print_fn("Start to dump job information") for job in self.job_list: if job['ps_ave'] != 0: ps_max_ave_writer.writerow(list([job['ps_max_ave']])) ps_max99_ave_writer.writerow(list([job['ps_max99_ave']])) w_writer.writerow(list([job['w_network'][0]])) # ps_w_writer.writerow(job['w_network'][0]) # for ps in job['ps_network']: # ps_writer.writerow(ps) # ps_w_writer.writerow(ps) ps_max_ave_fd.close() ps_max99_ave_fd.close() w_fd.close() ps_fd.close() ps_w_fd.close()
def _schedule(self, delta): if self.num_free_nodes() < 1: return jobs_all = self.jobs_manager.total_jobs(delta) scheduling_algo = algorithm.scheduling_algorithms[self.schedule] placement_algo = algorithm.placement_algorithms[self.placement] nodes, job, success = scheduling_algo(placement_algo, self.infrastructure, self.jobs_manager, delta) if success: if self.infrastructure.enable_network_costs: extras = network_service.calculate_network_costs( self.infrastructure, job) orginal_duration = job.duration job.add_network_costs(extras) util.print_fn( "Job %s : Original duration %f , New duration %f" % (job.job_id, orginal_duration, job.duration)) self.add_to_running(nodes, job.job_id) else: assert (jobs_all == self.jobs_manager.total_jobs(delta))
def parse_job_file(self): """from a csv convert to jobs""" if not os.path.exists(self.file_path): raise ValueError() fd = open(self.file_path, 'r') deli = ',' if self.file_path.find('.csv') == (len(self.file_path) - 4): deli = ',' elif self.file_path.find('.txt') == (len(self.file_path) - 4): deli = ' ' reader = csv.DictReader(fd, delimiter=deli) ''' Add job from job trace file''' keys = reader.fieldnames util.print_fn( '--------------------------------- Read TF jobs from: %s ---------------------------------' % os.path.basename(self.file_path)) util.print_fn(' we get the following fields:\n %s' % keys) for row in reader: self._add_to_job_queue(self.parse_job(row)) util.print_fn( '---------------------------------- Get %d TF jobs in total ----------------------------------' % self.total_jobs()) fd.close()
def try_alloc_job(self, job, is_single=False): """ NOTE: right now this assume all tasks can fit then we placed the tasks and corresponding job. """ result = False ps_tasks, worker_tasks = self.can_fit_num_task(job.tasks) if ps_tasks + worker_tasks >= job.task_count: copy_j = job.tasks.copy() placed = 0 for t in iter(copy_j.values()): result = self.try_reserve_and_placed_task(t) if result: job.tasks_running_on[t.task_id] = self.node_id placed += 1 if placed > 0: result = self.try_reserve_and_placed_job(job, is_single) if not result: # Not executed yet for jt in job.tasks.items(): job.tasks_running_on.pop(jt.task_id, None) self.placed_tasks.pop(jt.task_id, None) self.release_allocated_resources(jt) self.placed_jobs.pop(job.job_id) util.print_fn("RELEASED: Job does not fit on node", util.LOG_LEVEL_WARNING) return result util.print_fn( "placed SINGLE NODE job %s, num tasks %d on node %s" % (job.job_id, len(job.tasks), self.node_id)) else: util.print_fn("Job does not fit on node", util.LOG_LEVEL_WARNING) return result
def calculate_network_costs(infrastructure, job): """ NOTE: calculate the slow down given nodes are assigned, very basic network cost model, 2 is round trip. basic = (datasize/bandwidth * job_iteration * 2) """ # let's check where the PS is, if there is a PS. if not job.is_distributed(): return 0 ps_nodes = set() wk_nodes = set() for k, v in job.tasks_running_on.items(): if 'ps' in k: ps_nodes.add(v) else: wk_nodes.add(v) diff = ps_nodes.symmetric_difference(wk_nodes) cross_many = len(diff) if cross_many == 0: # cross node will induced latency, if all resides on the same node, # assume there is nothing even if there is PS-workers return 0 # assume PS has sharded parameters, # so the more difference we have, # the more communication we need to do. # per second **Some Heuristics** model_per_sec = (job.model_size / infrastructure.bandwidth) nodes_induced_sec = (cross_many * infrastructure.internode_latency) iteration_round_trip = job.iterations * 2.0 extra_seconds = ( model_per_sec + nodes_induced_sec ) * iteration_round_trip util.print_fn("Cross %s need to added extra %f for job %s" % (str(diff), extra_seconds, job.job_id)) return extra_seconds
def print_job_events(self): util.print_fn(' Print all job events ') for event in self.job_events: util.print_fn( ' event.time[%d], with %d start_jobs, and %d end_jobs' % (event['time'], len(event['start_jobs']), len( event['end_jobs']))) util.print_fn(' ')
def start(self): start_time = time.time() delta_time = 0 current_remaining = self.jobs_manager.total_jobs(delta_time) running_jobs = len(self.jobs_manager.running_jobs) steps = 0 while current_remaining + running_jobs > 0: # NOTE: Make decision on whether to: # 1. Done: schedule new jobs # 2. TODO: preempt running jobs # 3. TODO: migrate running jobs # 4. TODO: stochastic job arrival process self._gen_jobs(delta_time) time.sleep(1) if current_remaining > 0: # TODO: this will likely to be changed self._schedule(delta_time) new_current_remaining = self.jobs_manager.total_jobs(delta_time) time.sleep(1) end_time = time.time() self.release_finished_jobs(end_time) delta_time = end_time - start_time current_remaining = new_current_remaining running_jobs = len(self.jobs_manager.running_jobs) self.pending_time = self.jobs_manager.average_pending_time() steps += 1 util.print_fn( "Remaining jobs: %d, Running Jobs: %d Finished Jobs %d" % (new_current_remaining, running_jobs, len(self.jobs_manager.finished_jobs))) util.print_fn(self.jobs_manager.running_jobs.keys()) for k, v in iter(self.infrastructure.nodes.items()): util.print_fn( "Node %s is %s, GPU used %d, each node has tasks %s, gpu_utilizations %s" % (k, 'busy' if len(v.running_tasks) > 0 else 'free', v.gpu_used, str(v.running_tasks.keys()), str(v.gpu_mem_utilizations))) finished_time = time.time() total_time_taken = finished_time - start_time util.print_fn("Total Time Taken in seconds: %d" % total_time_taken)
def try_cross_node_alloc_ms(infrastructure, job, sort_fn=None, filter_fn=None): """ From Tiresias: try get gpus from multiple nodes [ need gpus / gpu_p_node ] nodes, and one node with [need_gpu % gpu_p_node] if can't find, give up, and return False """ # if someone decide to have 5 gpus but we have 4 per node, # we assigned 2 full node. least_num_full_nodes = math.ceil(job.gpus / infrastructure.num_gpu_p_node) nodes_assigned = {} to_be_assigned = job.tasks.copy() num_full_tasks = len(job.tasks) assigned_task = {} all_nodes = infrastructure.nodes.values() if filter_fn: all_nodes = filter_fn(all_nodes) if sort_fn: all_nodes = sort_fn(all_nodes) for node in all_nodes: if not node.is_free(): continue if len(assigned_task) == num_full_tasks: break # this is checking how many nodes can fit the job current remaining tasks. worker_tasks_can_fit = node.can_fit_num_task(to_be_assigned) if worker_tasks_can_fit == 0: continue worker_count = 0 pop_t = None check_next = False for k, v in iter(job.tasks.items()): if k in assigned_task: continue if 'worker' in k and worker_count <= worker_tasks_can_fit: pop_t = to_be_assigned.pop(k, None) worker_count += 1 else: continue if pop_t is not None: result = node.try_reserve_and_placed_task(pop_t) if not result: # we didn't actually placed anything if it was false. # put it back. to_be_assigned[k] = pop_t worker_count -= 1 logging.info( "unable to reserve job %s task %s on node %s, check next node..." % (job.job_id, k, node.node_id)) check_next = True break # from a job perspective keep track of where my tasks are job.tasks_running_on[k] = node.node_id # logging.info("Job %s - task %s placed on %s" % (job.job_id, k, node.node_id)) assigned_task[k] = v # at least we have some task in the node. if worker_count > 0: node.try_reserve_and_placed_job(job, False) nodes_assigned[node.node_id] = node logging.info( "Job %s require %d - placed on nodes %s" % (job.job_id, least_num_full_nodes, str(nodes_assigned.keys()))) if check_next: continue if len(nodes_assigned ) >= least_num_full_nodes and num_full_tasks == len( assigned_task): #util.print_fn("assigned number of nodes %d" % (len(nodes_assigned))) break # if not enough, clear everything. # NOTE: all tasks need to be assigned!!! if len(assigned_task) < num_full_tasks or len( nodes_assigned) < least_num_full_nodes: for node in iter(nodes_assigned.values()): node.placed_jobs.pop(job.job_id) for t in iter(job.tasks.values()): pop_t = node.placed_tasks.pop(t.task_id, None) if pop_t is not None: node.release_allocated_resources(pop_t, reserved=True) nodes_assigned.clear() logging.info("not enough ") return {}, False if len(nodes_assigned) >= least_num_full_nodes and len( assigned_task) == num_full_tasks: util.print_fn( "placed job %s with task %d, on node - %s" % (job.job_id, job.worker_count, str(nodes_assigned.keys()))) return nodes_assigned, True raise ArithmeticError()
def _setup_nodes(self, file_path): """read from a csv to init infrastructure""" if not os.path.exists(file_path): assert ValueError() project_dir = os.path.abspath( os.path.dirname(os.path.dirname(__file__))) spec_file = os.path.join(project_dir, file_path) name, ext = os.path.splitext(spec_file) # assume it is csv anyway assert 'csv' in ext f_handler = open(spec_file, 'r') reader = csv.DictReader(f_handler, delimiter=',') keys = reader.fieldnames util.print_fn(keys) for default_k in keys_default: if default_k not in keys: return # 1 line after reading fields assert reader.line_num == 1 for row in reader: self.num_switch = int(row['num_switch']) self.num_nodes_p_switch = int(row['num_node_p_switch']) self.num_gpu_p_node = int(row['num_gpu_p_node']) self.num_cpu_p_node = int(row['num_cpu_p_node']) self.mem_p_node = int(row['mem_p_node']) f_handler.close() nodes = 0 for rack_id in range(0, self.num_switch): rack = r.Rack(str(rack_id), self.bandwidth) for node_id in range(0, self.num_nodes_p_switch): nodes += 1 node = n.Node(rack.rack_id, str(nodes), self.gpu_memory_capacity, self.num_cpu_p_node, self.num_gpu_p_node, self.mem_p_node) self.nodes[str(nodes)] = node rack.add_node(node) self.racks[str(rack_id)] = rack util.print_fn("num_racks in cluster: %d" % len(self.racks)) first_rack = next(iter(self.racks.values())) first_rack_first_node = next(iter(first_rack.nodes.values())) util.print_fn("num_node_p_rack in cluster: %d" % len(first_rack.nodes)) util.print_fn("num_gpu_p_node in cluster: %d" % first_rack_first_node.gpu_count) util.print_fn("num_cpu_p_node in cluster: %d" % first_rack_first_node.cpu_count) util.print_fn("mem_p_node in cluster: %d" % first_rack_first_node.mem_size) util.print_fn("Total nodes in cluster: %d " % len(self.nodes)) util.print_fn("Total racks in cluster: %d " % len(self.racks)) util.print_fn( '--------------------------------- End of cluster spec ---------------------------------' )
def add_node(self, node): if node.node_id not in self.nodes: self.nodes[node.node_id] = node else: util.print_fn("Node already in rack", util.LOG_LEVEL_WARNING)
def try_cross_node_alloc_ms(infrastructure, job): """ From Tiresias: try get gpus from multiple nodes [ need gpus / gpu_p_node ] nodes, and one node with [need_gpu % gpu_p_node] if can't find, give up, and return False """ # if someone decide to have 5 gpus but we have 4 per node, # we assigned 2 full node. least_num_full_nodes = math.ceil(job.gpus / infrastructure.num_gpu_p_node) nodes_assigned = {} to_be_assigned = job.tasks.copy() num_full_tasks = len(job.tasks) assigned_task = {} for n_id, node in iter(infrastructure.nodes.items()): if not node.is_free(): continue if len(assigned_task) == len(to_be_assigned): break # this is checking how many nodes can fit the job current remaining tasks. ps_tasks_can_fit, worker_tasks_can_fit = node.can_fit_num_task( to_be_assigned) ps_count = 0 worker_count = 0 pop_t = None for k, v in iter(job.tasks.items()): if k in assigned_task: continue if 'ps' in k and ps_count <= ps_tasks_can_fit: pop_t = to_be_assigned[k] ps_count += 1 elif 'worker' in k and worker_count <= worker_tasks_can_fit: pop_t = to_be_assigned[k] worker_count += 1 else: continue if pop_t is not None: result = node.try_reserve_and_placed_task(pop_t) if not result: # we didn't actually placed anything if it was false. continue # from a job perspective keep track of where my tasks are job.tasks_running_on[k] = node.node_id assigned_task[k] = v # at least we have some task in the node. if ps_count > 0 or worker_count > 0: node.try_reserve_and_placed_job(job, False) nodes_assigned[node.node_id] = node if len(nodes_assigned ) >= least_num_full_nodes and num_full_tasks == assigned_task: util.print_fn("assigned number of nodes %d" % (len(nodes_assigned))) break # if not enough, clear everything. # NOTE: all tasks need to be assigned!!! if len(assigned_task) < num_full_tasks or len( nodes_assigned) < least_num_full_nodes: for node in iter(nodes_assigned.values()): node.placed_jobs.pop(job.job_id) for t in iter(job.tasks.values()): pop_t = node.placed_tasks.pop(t.task_id, None) if pop_t is not None: node.release_allocated_resources(pop_t) nodes_assigned.clear() return {}, False if len(nodes_assigned) >= least_num_full_nodes and len( assigned_task) == num_full_tasks: util.print_fn("placed job %s, on node %s" % (job.job_id, str(nodes_assigned.keys()))) return nodes_assigned, True raise ArithmeticError()
def _init_nodes(self): nodes = 0 for rack_id in range(0, self.num_switch): rack = r.Rack(str(rack_id), self.bandwidth) for _ in range(0, self.num_nodes_p_switch): nodes += 1 node = n.Node(rack.rack_id, str(nodes), self.gpu_memory_capacity, self.num_cpu_p_node, self.num_gpu_p_node, self.mem_p_node, enable_pack=self.flags.pack) self.nodes[str(nodes)] = node rack.add_node(node) self.racks[str(rack_id)] = rack util.print_fn("num_racks in cluster: %d" % len(self.racks)) first_rack = next(iter(self.racks.values())) first_rack_first_node = next(iter(first_rack.nodes.values())) util.print_fn("num_node_p_rack in cluster: %d" % len(first_rack.nodes)) util.print_fn("num_gpu_p_node in cluster: %d" % first_rack_first_node.gpu_count) util.print_fn("num_cpu_p_node in cluster: %d" % first_rack_first_node.cpu_count) util.print_fn("mem_p_node in cluster: %d" % first_rack_first_node.mem_size) util.print_fn("Total nodes in cluster: %d " % len(self.nodes)) util.print_fn("Total racks in cluster: %d " % len(self.racks)) util.print_fn( '--------------------------------- End of cluster spec ---------------------------------' )
def add_node(self, node): if not self.nodes.__contains__(node): self.nodes.append(node) else: util.print_fn("Node already in rack", util.LOG_LEVEL_WARNING)
def release_job_gpu(self, num_job=1): if num_job < 0: util.print_fn("Error: num_job < 0") exit() self.free_gpu += int(self.num_gpu * num_job)
def completion_check(self): for num_gpu, gjob in self.gpu_job.items(): if gjob.end_job != gjob.total_job: util.print_fn( '!!!! Miss-match %d completed jobs with %d total jobs in %d-GPU jobs' % (gjob.end_job, gjob.total_job, num_gpu))
def reserve_gpus(self, total_num): ''' GPU cluster reserve gpus for gpu_job groups ''' num_group = len(self.gpu_job) ave_gpu = math.floor(total_num / num_group) job_list = list() for num_gpu, gjob in self.gpu_job.items(): tmp_dict = dict() tmp_dict['num_gpu'] = num_gpu tmp_dict['used_gpu'] = gjob.total_gpu - gjob.free_gpu tmp_dict['demands'] = gjob.get_gpu_demands() tmp_dict['cur_gpu'] = gjob.total_gpu tmp_dict['cur_free_gpu'] = gjob.free_gpu tmp_dict['reserve'] = 0 job_list.append(tmp_dict) total_free_gpu = total_num - sum(k['used_gpu'] for k in job_list) total_demands = sum(k['demands'] for k in job_list) # print('total_free %d, total_demands %d' % (total_free_gpu, total_demands)) if total_demands == 0: return '''demand-based, keep current used_gpu''' remain_free_gpu = total_free_gpu job_list.sort(key=lambda e: e.__getitem__('demands')) for job_dict in job_list: if job_dict['demands'] == 0: continue ratio = round((job_dict['demands'] * 1.0) / total_demands, 2) cal_gpu = int( math.floor((ratio * total_num) / job_dict['num_gpu']) * job_dict['num_gpu']) cal_gpu = job_dict[ 'demands'] if job_dict['demands'] <= cal_gpu else cal_gpu extra_gpu = cal_gpu - job_dict['used_gpu'] if extra_gpu <= 0: extra_gpu = 0 elif extra_gpu > remain_free_gpu: extra_gpu = int( math.floor(remain_free_gpu / job_dict['num_gpu']) * job_dict['num_gpu']) # print('%d-GPU, u%d, cal_gpu %d, extra_g %d' %(job_dict['num_gpu'], job_dict['used_gpu'], cal_gpu, extra_gpu)) job_dict['reserve'] = job_dict['used_gpu'] + extra_gpu remain_free_gpu -= extra_gpu # if remain_free_gpu <= 0: # break ''' still remaining, give to the right job group''' job_list.sort(key=lambda e: e.__getitem__('num_gpu')) num_full = 0 while remain_free_gpu > 0: # if all are satisfied if num_full >= len(job_list): break else: num_full = 0 for job_dict in job_list: if job_dict['demands'] <= job_dict['reserve']: num_full += 1 continue if remain_free_gpu >= job_dict['num_gpu']: remain_free_gpu -= job_dict['num_gpu'] job_dict['reserve'] += job_dict['num_gpu'] else: num_full += 1 if remain_free_gpu <= 0: break #execute reservation for job_dict in job_list: num_gpu = job_dict['num_gpu'] self.gpu_job[num_gpu].get_gpu_reservation(job_dict['reserve']) print("%d-j, T%d, F%d, U%d, N%d, R%d; " % (job_dict['num_gpu'], job_dict['cur_gpu'], job_dict['cur_free_gpu'], job_dict['used_gpu'], job_dict['demands'], job_dict['reserve']), end=' ') for num_gpu, gjob in self.gpu_job.items(): if gjob.free_gpu < 0: print("Error free gpu, %d" % num_gpu) exit() util.print_fn(' %s is done' % sys._getframe().f_code.co_name)