Example #1
0
    def get_network_load(self, job_dict):
        if 'num_gpu' not in job_dict:
            util.print_fn('No gpu information')
            return

        if 'model' not in job_dict:
            util.print_fn('No model information')
            return

        num_w = job_dict['num_gpu']
        num_ps = num_w

        if num_w == 1:
            job_dict['ps_network'] = list()
            job_dict['w_network'] = list([0])
            '''
            check job ps_size
            '''
            job_dict['ps_ave'] = 0
            return

        job_dict['w_network'] = list([job_dict['model']['total_size']] * num_w)
        job_dict['ps_network'] = list([0] * num_ps)
        for i in range(0, len(job_dict['model']['tensors'])):
            ps_idx = int(i % num_ps)
            # job_dict['ps_network'][ps_idx] += (job_dict['model']['tensors'][i] * num_w)
            job_dict['ps_network'][ps_idx] += (job_dict['model']['tensors'][i])

        for i in range(0, len(job_dict['ps_network'])):
            job_dict['ps_network'][i] = round(job_dict['ps_network'][i], 1)
        '''
Example #2
0
def get_model(model_name):
    '''
    get model tensor information by model_name
    return a dict{name, tensors(list)}
    '''
    if model_name == 'vgg19':
        m_idx = 0
    elif model_name == 'vgg16':
        m_idx = 1
    elif model_name == 'vgg11':
        m_idx = 2
    elif model_name == 'alexnet':
        m_idx = 3
    elif model_name == 'resnet152':
        m_idx = 4
    elif model_name == 'resnet101':
        m_idx = 5
    elif model_name == 'resnet50':
        m_idx = 6
    elif model_name == 'inception4':
        m_idx = 7
    elif model_name == 'inception3':
        m_idx = 8
    else:
        # m_idx = random.randint(0,8)
        m_idx = 8
        util.print_fn('No model match, pick %s' % m_names[m_idx])

    ret = {
        'name': m_names[m_idx],
        'ind': m_idx,
        'tensors': m_tensors[m_idx],
        'mem_util': m_mem[m_idx]
    }
    return ret
Example #3
0
 def get_job_model(self, job_dict):
     # if job_dict.has_key('model_name') and job_dict.has_key('model_scale'):
     if ('model_name' in job_dict) and ('model_scale' in job_dict):
         job_dict['model'] = models.get_model_with_scale(
             job_dict['model_name'], job_dict['model_scale'])
     else:
         util.print_fn('Not enough model information to get the details')
Example #4
0
    def add_task(self, task, pack=False):
        result = False
        if self.can_fit(task):
            if not pack and len(self.running_tasks) > 0:
                # not placing
                return False
            # add interference latency
            if len(self.running_tasks) >= 2:
                original_duration = task.original_duration
                sum_utilisation = 0
                for _, t in self.running_tasks.items():
                    sum_utilisation += min(
                        100,
                        np.random.normal(loc=t.gpu_utilization_avg,
                                         scale=(t.gpu_utilization_max -
                                                t.gpu_utilization_avg) / 4,
                                         size=1))
                utilslowdown = np.polyval(NV_2080_COEF, sum_utilisation)

                new_duration = task.duration * utilslowdown
                task.interfered = True
                util.print_fn(
                    "original duration: %.3f , new duration: %.3f, %d tasks on device %s, Node %s, sum utilisation %.3f and factor at %.3f"
                    % (original_duration, new_duration, len(
                        self.running_tasks), str(self.device_id),
                       str(self.node_id), sum_utilisation, utilslowdown))
            else:
                task.interfered = False
                task.duration = task.original_duration

            self.running_tasks[task.task_id] = task
            result = True
        return result
Example #5
0
    def prepare_job_start_events(self):
        '''
        add job start events into job_events list
        end events should be added when they are starting
        '''
        for job in self.job_list:
            start_t = job['submit_time']
            # util.print_fn('%d, %d' % (start_t, end_t))

            #for job start
            tmp_dict = util.search_dict_list(self.job_events, 'time', start_t)
            if tmp_dict == None:
                #not found, add the time into to job_events
                tmp_dict = dict()
                tmp_dict['time'] = start_t
                tmp_dict['start_jobs'] = list()
                tmp_dict['end_jobs'] = list()
                tmp_dict['start_jobs'].append(job)
                self.job_events.append(tmp_dict)
            else:
                tmp_dict['start_jobs'].append(job)

            job['status'] = 'EVENT'  #job has been in EVENT status
        ''' sort events based on their time'''
        self.job_events.sort(key=lambda e: e.__getitem__('time'))
        util.print_fn('Init, add job start events')
        self.print_job_events()
Example #6
0
    def checkpoint_multi_dlas_gpu(self, job_queue, event_time):
        '''
        Record cluster, and job information, including:
        time
        idle_node
        busy_node: gpu running
        full_node: all gpus are running
        idle_gpu
        busy_gpu
        pending_job
        running_job
        completed_job
        '''
        idle_node = 0
        busy_node = 0
        full_node = 0
        idle_gpu = 0
        busy_gpu = 0
        pending_job = 0
        running_job = 0
        completed_job = 0

        if FLAGS.schedule != 'multi-dlas-gpu':
            util.print_fn("Error, not multi-dlas-gpu in checkpoint")
            exit()

        for num_gpu, gjob in job_queue.gpu_job.items():
            idle_gpu += gjob.free_gpu

        busy_gpu = CLUSTER.num_gpu - idle_gpu

        busy_node = int(math.ceil(busy_gpu / CLUSTER.num_gpu_p_node))
        full_node = busy_node
        idle_node = int(CLUSTER.num_node - busy_node)

        for job in job_queue.job_list:
            if job['status'] == 'RUNNING':
                running_job += 1
            elif job['status'] == 'PENDING':
                pending_job += 1
            elif job['status'] == 'END':
                completed_job += 1

        #add log
        self.log_list.append([
            event_time,
            int(idle_node),
            int(busy_node),
            int(full_node),
            int(idle_gpu),
            int(busy_gpu),
            int(pending_job),
            int(running_job),
            int(completed_job)
        ])
        if len(self.log_list) >= 1:
            self.dump_all_logs()
Example #7
0
def fit_first_sim_jobs(job_queue, cluster, logger):
    '''
    new jobs are added to the end of the ending queue
    but any fit job should be executed in fifo order
    '''
    while (len(job_queue.job_events) + len(job_queue.pending_jobs))> 0:
        if len(job_queue.job_events) == 0:
            util.print_fn("This cluster is not large enough to run the job")
            break

        event = job_queue.job_events[0]
        event_time = event['time']
        # util.print_fn('--------------------------------- Handle event[time %d]------------------------------------' % event_time)
        #for ending jobs, release gpu
        for e_job in event['end_jobs']:
            #remove from migratable jobs, if it's there
            job_queue.remove_migratable(e_job)

            #job completes
            cluster.release_job_res(e_job)
            logger.job_complete(e_job, event_time)


        #for new-start jobs, try to start
        for s_job in event['start_jobs']:
            #add into pending list
            job_queue.move_to_pending(s_job)

        new_start_list = list()
        for p_job in job_queue.pending_jobs:
            # ret = CLUSTER.alloc_gpus(p_job)
            if cluster.check_free_gpu() <= 0:
                break
            ret = try_get_job_res(cluster, job_queue, p_job)
            if ret == True:
                ''' if remove_from_pending, then will miss the next p_job in the list '''
                new_start_list.append(p_job)
                # JOBS.remove_from_pending(p_job, event_time)
                # JOBS.add_job_end_event(p_job)
                # util.print_fn('----job[%d] starts from pending' % p_job['job_idx'])
            else:
                continue

        for ns_job in new_start_list:
            job_queue.remove_from_pending(ns_job, event_time)
            job_queue.add_job_end_event(ns_job)
            util.print_fn('----job[%d] starts from pending' % ns_job['job_idx'])

        #sort pending jobs based on the num_gpu
        #JOBS.pending_jobs.sort(key = lambda e:e.__getitem__('num_gpu'))

        #remove time_event
        job_queue.job_events.pop(0)
        job_queue.job_events.sort(key=lambda e:e.__getitem__('time'))

        logger.checkpoint(job_queue, event_time)
Example #8
0
    def add_job(self, job):
        requirements = job.resource_requirements
        result = False
        if self.check_resources(requirements):
            self.alloc_job(requirements)
            self.jobs.append(job)
            job.migration_count += 1
            result = True
        else:
            util.print_fn("Job does not fit on node", util.LOG_LEVEL_WARNING)

        return result
Example #9
0
    def end_job(self, e_job):
        if self.flags.schedule != 'multi-dlas-gpu':
            util.print_fn("Not multi-dlas-gpu")
            exit()

        num_gpu = e_job['num_gpu']
        gjob = self.gpu_job[num_gpu]
        gjob.release_job_gpu(1)
        gjob.runnable_jobs.remove(e_job)
        # gjob.running_jobs.remove(e_job)
        gjob.queues[e_job['q_id']].remove(e_job)
        gjob.end_job += 1
Example #10
0
def schedule_fifo(scheme, placement_algo, infrastructure, jobs_manager, delta,
                  **kwargs):
    """NOTE: First in first out, does not preempt or migrate"""
    # F in F out, get the first job from the queue
    next_job = jobs_manager.get_next_job(delta)
    if next_job is None:
        util.print_fn("no job ready at time %d" % (delta))
        return None, None, None
    assert next_job.is_waiting()
    nodes, success = placement_algo(infrastructure, next_job, scheme)
    if success:
        _ = jobs_manager.pop(delta)
        return nodes, next_job, success

    return nodes, next_job, success
Example #11
0
 def __init__(self,
              id,
              flags,
              job_queue,
              num_node=0,
              num_gpu_p_node=0,
              num_cpu_p_node=0,
              mem_p_node=0):
     self.num_node = num_node
     self.flags = flags
     self.job_queue = job_queue
     self.num_gpu_p_node = num_gpu_p_node
     self.num_cpu_p_node = num_cpu_p_node
     self.mem_p_node = mem_p_node
     self.id = id
     self.node_list = list()
     util.print_fn('  Switch[%d] has %d nodes' % (id, num_node))
Example #12
0
    def sort_all_jobs(self, mode=None):
        '''
        Sort jobs based on their sumbit_time
        j1, num_gpu, start_t, end_t, duration
        '''
        # tmp_list = sorted(self.job_list, key = lambda e:e.__getitem__('start_time'))
        # tmp_dict = util.search_dict_list(self.job_list, 'start_time', 4)
        # tmp_dict['end_time'] = 15
        # print(tmp_dict)
        # self.job_list = tmp_list

        self.job_list.sort(key=lambda e: e.__getitem__('submit_time'))
        util.print_fn('   Jobs are sorted with their start time')
        # self.read_all_jobs()
        if self.flags.schedule == 'multi-dlas-gpu' and self.flags.scheme == 'count':
            for num_gpu, gjob in self.gpu_job.items():
                util.print_fn('%d-GPU jobs have %d ' %
                              (num_gpu, gjob.total_job))
Example #13
0
    def __init__(self, id, num_gpu=0, num_cpu=0, mem=0):
        self.id = id
        self.num_cpu = num_cpu
        self.free_cpus = num_cpu
        self.num_gpu = num_gpu       
        self.free_gpus = num_gpu
        #network load: can be bw, or the amount of traffic
        # in and out should be the same
        self.network_in = 0
        self.network_out = 0

        self.mem = mem
        self.free_mem = mem

        #node class for gandiva
        self.job_gpu = 0
        self.num_jobs = 0

        util.print_fn('    Node[%d] has %d gpus, %d cpus, %d G memory' % (id, num_gpu, num_cpu, mem))
Example #14
0
    def print_all_job_size_info(self):
        '''
        print job tensor info
        '''

        ps_max_ave_fd = open('ps_max_ave.csv', 'w+')
        ps_max_ave_writer = csv.writer(ps_max_ave_fd)
        ps_max_ave_writer.writerow(['ps_max_ave'])

        ps_max99_ave_fd = open('ps_max99_ave.csv', 'w+')
        ps_max99_ave_writer = csv.writer(ps_max99_ave_fd)
        ps_max99_ave_writer.writerow(['ps_max99_ave'])

        w_fd = open('w.csv', 'w+')
        w_writer = csv.writer(w_fd)
        w_writer.writerow(['w'])

        ps_fd = open('ps.csv', 'w+')
        ps_writer = csv.writer(ps_fd)
        ps_writer.writerow(['ps'])

        ps_w_fd = open('ps_w.csv', 'w+')
        ps_w_writer = csv.writer(ps_w_fd)
        ps_w_writer.writerow(['ps_w'])

        util.print_fn("Start to dump job information")
        for job in self.job_list:
            if job['ps_ave'] != 0:
                ps_max_ave_writer.writerow(list([job['ps_max_ave']]))
                ps_max99_ave_writer.writerow(list([job['ps_max99_ave']]))
                w_writer.writerow(list([job['w_network'][0]]))
                # ps_w_writer.writerow(job['w_network'][0])
                # for ps in job['ps_network']:
                #     ps_writer.writerow(ps)
                #     ps_w_writer.writerow(ps)

        ps_max_ave_fd.close()
        ps_max99_ave_fd.close()
        w_fd.close()
        ps_fd.close()
        ps_w_fd.close()
Example #15
0
 def _schedule(self, delta):
     if self.num_free_nodes() < 1:
         return
     jobs_all = self.jobs_manager.total_jobs(delta)
     scheduling_algo = algorithm.scheduling_algorithms[self.schedule]
     placement_algo = algorithm.placement_algorithms[self.placement]
     nodes, job, success = scheduling_algo(placement_algo,
                                           self.infrastructure,
                                           self.jobs_manager, delta)
     if success:
         if self.infrastructure.enable_network_costs:
             extras = network_service.calculate_network_costs(
                 self.infrastructure, job)
             orginal_duration = job.duration
             job.add_network_costs(extras)
             util.print_fn(
                 "Job %s : Original duration %f , New duration %f" %
                 (job.job_id, orginal_duration, job.duration))
         self.add_to_running(nodes, job.job_id)
     else:
         assert (jobs_all == self.jobs_manager.total_jobs(delta))
Example #16
0
    def parse_job_file(self):
        """from a csv convert to jobs"""
        if not os.path.exists(self.file_path):
            raise ValueError()

        fd = open(self.file_path, 'r')
        deli = ','
        if self.file_path.find('.csv') == (len(self.file_path) - 4):
            deli = ','
        elif self.file_path.find('.txt') == (len(self.file_path) - 4):
            deli = ' '

        reader = csv.DictReader(fd, delimiter=deli)
        ''' Add job from job trace file'''
        keys = reader.fieldnames
        util.print_fn(
            '--------------------------------- Read TF jobs from: %s ---------------------------------'
            % os.path.basename(self.file_path))
        util.print_fn('    we get the following fields:\n        %s' % keys)
        for row in reader:
            self._add_to_job_queue(self.parse_job(row))

        util.print_fn(
            '---------------------------------- Get %d TF jobs in total ----------------------------------'
            % self.total_jobs())
        fd.close()
Example #17
0
    def try_alloc_job(self, job, is_single=False):
        """
        NOTE: right now this assume all tasks can fit then we placed the tasks and corresponding job.
        """
        result = False
        ps_tasks, worker_tasks = self.can_fit_num_task(job.tasks)

        if ps_tasks + worker_tasks >= job.task_count:
            copy_j = job.tasks.copy()
            placed = 0
            for t in iter(copy_j.values()):
                result = self.try_reserve_and_placed_task(t)
                if result:
                    job.tasks_running_on[t.task_id] = self.node_id
                    placed += 1
            if placed > 0:
                result = self.try_reserve_and_placed_job(job, is_single)
                if not result:
                    # Not executed yet
                    for jt in job.tasks.items():
                        job.tasks_running_on.pop(jt.task_id, None)
                        self.placed_tasks.pop(jt.task_id, None)
                        self.release_allocated_resources(jt)
                    self.placed_jobs.pop(job.job_id)
                    util.print_fn("RELEASED: Job does not fit on node",
                                  util.LOG_LEVEL_WARNING)
                    return result
                util.print_fn(
                    "placed SINGLE NODE job %s, num tasks %d on node %s" %
                    (job.job_id, len(job.tasks), self.node_id))
        else:
            util.print_fn("Job does not fit on node", util.LOG_LEVEL_WARNING)
        return result
Example #18
0
def calculate_network_costs(infrastructure, job):
    """
        NOTE:
            calculate the slow down given nodes are assigned,
            very basic network cost model,
            2 is round trip.
            basic = (datasize/bandwidth * job_iteration * 2)
    """
    # let's check where the PS is, if there is a PS.
    if not job.is_distributed():
        return 0

    ps_nodes = set()
    wk_nodes = set()
    for k, v in job.tasks_running_on.items():
        if 'ps' in k:
            ps_nodes.add(v)
        else:
            wk_nodes.add(v)

    diff = ps_nodes.symmetric_difference(wk_nodes)
    cross_many = len(diff)
    if cross_many == 0:
        # cross node will induced latency, if all resides on the same node, 
        # assume there is nothing even if there is PS-workers
        return 0

    # assume PS has sharded parameters,
    # so the more difference we have,
    # the more communication we need to do.
    # per second **Some Heuristics**
    model_per_sec = (job.model_size / infrastructure.bandwidth)
    nodes_induced_sec = (cross_many * infrastructure.internode_latency)
    iteration_round_trip = job.iterations * 2.0
    extra_seconds = ( model_per_sec + nodes_induced_sec ) * iteration_round_trip
    util.print_fn("Cross %s need to added extra %f for job %s" % (str(diff), extra_seconds, job.job_id))
    return extra_seconds
Example #19
0
    def print_job_events(self):
        util.print_fn('    Print all job events ')
        for event in self.job_events:
            util.print_fn(
                '      event.time[%d], with %d start_jobs, and %d end_jobs' %
                (event['time'], len(event['start_jobs']), len(
                    event['end_jobs'])))

        util.print_fn(' ')
Example #20
0
    def start(self):
        start_time = time.time()
        delta_time = 0
        current_remaining = self.jobs_manager.total_jobs(delta_time)
        running_jobs = len(self.jobs_manager.running_jobs)
        steps = 0
        while current_remaining + running_jobs > 0:
            # NOTE: Make decision on whether to:
            # 1. Done: schedule new jobs
            # 2. TODO: preempt running jobs
            # 3. TODO: migrate running jobs
            # 4. TODO: stochastic job arrival process
            self._gen_jobs(delta_time)
            time.sleep(1)
            if current_remaining > 0:
                # TODO: this will likely to be changed
                self._schedule(delta_time)
            new_current_remaining = self.jobs_manager.total_jobs(delta_time)
            time.sleep(1)
            end_time = time.time()
            self.release_finished_jobs(end_time)
            delta_time = end_time - start_time
            current_remaining = new_current_remaining
            running_jobs = len(self.jobs_manager.running_jobs)
            self.pending_time = self.jobs_manager.average_pending_time()
            steps += 1
            util.print_fn(
                "Remaining jobs: %d, Running Jobs: %d Finished Jobs %d" %
                (new_current_remaining, running_jobs,
                 len(self.jobs_manager.finished_jobs)))
            util.print_fn(self.jobs_manager.running_jobs.keys())
            for k, v in iter(self.infrastructure.nodes.items()):
                util.print_fn(
                    "Node %s is %s, GPU used %d, each node has tasks %s, gpu_utilizations %s"
                    % (k, 'busy' if len(v.running_tasks) > 0 else 'free',
                       v.gpu_used, str(v.running_tasks.keys()),
                       str(v.gpu_mem_utilizations)))

        finished_time = time.time()
        total_time_taken = finished_time - start_time
        util.print_fn("Total Time Taken in seconds: %d" % total_time_taken)
Example #21
0
def try_cross_node_alloc_ms(infrastructure, job, sort_fn=None, filter_fn=None):
    """
    From Tiresias:
    try get gpus from multiple nodes
        [ need gpus / gpu_p_node ] nodes, and one node with [need_gpu % gpu_p_node]
    if can't find, give up, and return False
    """
    # if someone decide to have 5 gpus but we have 4 per node,
    # we assigned 2 full node.
    least_num_full_nodes = math.ceil(job.gpus / infrastructure.num_gpu_p_node)

    nodes_assigned = {}
    to_be_assigned = job.tasks.copy()
    num_full_tasks = len(job.tasks)
    assigned_task = {}
    all_nodes = infrastructure.nodes.values()

    if filter_fn:
        all_nodes = filter_fn(all_nodes)

    if sort_fn:
        all_nodes = sort_fn(all_nodes)

    for node in all_nodes:
        if not node.is_free(): continue

        if len(assigned_task) == num_full_tasks: break

        # this is checking how many nodes can fit the job current remaining tasks.
        worker_tasks_can_fit = node.can_fit_num_task(to_be_assigned)
        if worker_tasks_can_fit == 0:
            continue

        worker_count = 0
        pop_t = None
        check_next = False
        for k, v in iter(job.tasks.items()):
            if k in assigned_task:
                continue

            if 'worker' in k and worker_count <= worker_tasks_can_fit:
                pop_t = to_be_assigned.pop(k, None)
                worker_count += 1
            else:
                continue

            if pop_t is not None:
                result = node.try_reserve_and_placed_task(pop_t)
                if not result:
                    # we didn't actually placed anything if it was false.
                    # put it back.
                    to_be_assigned[k] = pop_t
                    worker_count -= 1
                    logging.info(
                        "unable to reserve job %s task %s on node %s, check next node..."
                        % (job.job_id, k, node.node_id))
                    check_next = True
                    break
                # from a job perspective keep track of where my tasks are
                job.tasks_running_on[k] = node.node_id
                # logging.info("Job %s - task %s placed on %s" % (job.job_id, k, node.node_id))
                assigned_task[k] = v

        # at least we have some task in the node.
        if worker_count > 0:
            node.try_reserve_and_placed_job(job, False)
            nodes_assigned[node.node_id] = node
            logging.info(
                "Job %s require %d - placed on nodes %s" %
                (job.job_id, least_num_full_nodes, str(nodes_assigned.keys())))

        if check_next:
            continue

        if len(nodes_assigned
               ) >= least_num_full_nodes and num_full_tasks == len(
                   assigned_task):
            #util.print_fn("assigned number of nodes %d" %   (len(nodes_assigned)))
            break

    # if not enough, clear everything.
    # NOTE: all tasks need to be assigned!!!
    if len(assigned_task) < num_full_tasks or len(
            nodes_assigned) < least_num_full_nodes:
        for node in iter(nodes_assigned.values()):
            node.placed_jobs.pop(job.job_id)
            for t in iter(job.tasks.values()):
                pop_t = node.placed_tasks.pop(t.task_id, None)
                if pop_t is not None:
                    node.release_allocated_resources(pop_t, reserved=True)
        nodes_assigned.clear()
        logging.info("not enough ")
        return {}, False

    if len(nodes_assigned) >= least_num_full_nodes and len(
            assigned_task) == num_full_tasks:
        util.print_fn(
            "placed job %s with task %d, on node - %s" %
            (job.job_id, job.worker_count, str(nodes_assigned.keys())))
        return nodes_assigned, True

    raise ArithmeticError()
Example #22
0
    def _setup_nodes(self, file_path):
        """read from a csv to init infrastructure"""
        if not os.path.exists(file_path):
            assert ValueError()

        project_dir = os.path.abspath(
            os.path.dirname(os.path.dirname(__file__)))
        spec_file = os.path.join(project_dir, file_path)

        name, ext = os.path.splitext(spec_file)
        # assume it is csv anyway
        assert 'csv' in ext
        f_handler = open(spec_file, 'r')
        reader = csv.DictReader(f_handler, delimiter=',')
        keys = reader.fieldnames
        util.print_fn(keys)

        for default_k in keys_default:
            if default_k not in keys: return

        # 1 line after reading fields
        assert reader.line_num == 1

        for row in reader:
            self.num_switch = int(row['num_switch'])
            self.num_nodes_p_switch = int(row['num_node_p_switch'])
            self.num_gpu_p_node = int(row['num_gpu_p_node'])
            self.num_cpu_p_node = int(row['num_cpu_p_node'])
            self.mem_p_node = int(row['mem_p_node'])
        f_handler.close()

        nodes = 0
        for rack_id in range(0, self.num_switch):
            rack = r.Rack(str(rack_id), self.bandwidth)
            for node_id in range(0, self.num_nodes_p_switch):
                nodes += 1
                node = n.Node(rack.rack_id, str(nodes),
                              self.gpu_memory_capacity, self.num_cpu_p_node,
                              self.num_gpu_p_node, self.mem_p_node)
                self.nodes[str(nodes)] = node
                rack.add_node(node)
            self.racks[str(rack_id)] = rack

        util.print_fn("num_racks in cluster: %d" % len(self.racks))
        first_rack = next(iter(self.racks.values()))
        first_rack_first_node = next(iter(first_rack.nodes.values()))
        util.print_fn("num_node_p_rack in cluster: %d" % len(first_rack.nodes))
        util.print_fn("num_gpu_p_node in cluster: %d" %
                      first_rack_first_node.gpu_count)
        util.print_fn("num_cpu_p_node in cluster: %d" %
                      first_rack_first_node.cpu_count)
        util.print_fn("mem_p_node in cluster: %d" %
                      first_rack_first_node.mem_size)
        util.print_fn("Total nodes in cluster: %d " % len(self.nodes))
        util.print_fn("Total racks in cluster: %d " % len(self.racks))
        util.print_fn(
            '--------------------------------- End of cluster spec ---------------------------------'
        )
Example #23
0
 def add_node(self, node):
     if node.node_id not in self.nodes:
         self.nodes[node.node_id] = node
     else:
         util.print_fn("Node already in rack", util.LOG_LEVEL_WARNING)
Example #24
0
def try_cross_node_alloc_ms(infrastructure, job):
    """
    From Tiresias:
    try get gpus from multiple nodes
        [ need gpus / gpu_p_node ] nodes, and one node with [need_gpu % gpu_p_node]
    if can't find, give up, and return False
    """
    # if someone decide to have 5 gpus but we have 4 per node,
    # we assigned 2 full node.
    least_num_full_nodes = math.ceil(job.gpus / infrastructure.num_gpu_p_node)

    nodes_assigned = {}
    to_be_assigned = job.tasks.copy()
    num_full_tasks = len(job.tasks)
    assigned_task = {}
    for n_id, node in iter(infrastructure.nodes.items()):
        if not node.is_free(): continue

        if len(assigned_task) == len(to_be_assigned): break

        # this is checking how many nodes can fit the job current remaining tasks.
        ps_tasks_can_fit, worker_tasks_can_fit = node.can_fit_num_task(
            to_be_assigned)

        ps_count = 0
        worker_count = 0
        pop_t = None
        for k, v in iter(job.tasks.items()):
            if k in assigned_task:
                continue

            if 'ps' in k and ps_count <= ps_tasks_can_fit:
                pop_t = to_be_assigned[k]
                ps_count += 1
            elif 'worker' in k and worker_count <= worker_tasks_can_fit:
                pop_t = to_be_assigned[k]
                worker_count += 1
            else:
                continue

            if pop_t is not None:
                result = node.try_reserve_and_placed_task(pop_t)
                if not result:
                    # we didn't actually placed anything if it was false.
                    continue
                # from a job perspective keep track of where my tasks are
                job.tasks_running_on[k] = node.node_id
                assigned_task[k] = v

        # at least we have some task in the node.
        if ps_count > 0 or worker_count > 0:
            node.try_reserve_and_placed_job(job, False)
            nodes_assigned[node.node_id] = node

        if len(nodes_assigned
               ) >= least_num_full_nodes and num_full_tasks == assigned_task:
            util.print_fn("assigned number of nodes %d" %
                          (len(nodes_assigned)))
            break

    # if not enough, clear everything.
    # NOTE: all tasks need to be assigned!!!
    if len(assigned_task) < num_full_tasks or len(
            nodes_assigned) < least_num_full_nodes:
        for node in iter(nodes_assigned.values()):
            node.placed_jobs.pop(job.job_id)
            for t in iter(job.tasks.values()):
                pop_t = node.placed_tasks.pop(t.task_id, None)
                if pop_t is not None:
                    node.release_allocated_resources(pop_t)
        nodes_assigned.clear()
        return {}, False

    if len(nodes_assigned) >= least_num_full_nodes and len(
            assigned_task) == num_full_tasks:
        util.print_fn("placed job %s, on node %s" %
                      (job.job_id, str(nodes_assigned.keys())))
        return nodes_assigned, True

    raise ArithmeticError()
Example #25
0
    def _init_nodes(self):

        nodes = 0
        for rack_id in range(0, self.num_switch):
            rack = r.Rack(str(rack_id), self.bandwidth)
            for _ in range(0, self.num_nodes_p_switch):
                nodes += 1
                node = n.Node(rack.rack_id,
                              str(nodes),
                              self.gpu_memory_capacity,
                              self.num_cpu_p_node,
                              self.num_gpu_p_node,
                              self.mem_p_node,
                              enable_pack=self.flags.pack)
                self.nodes[str(nodes)] = node
                rack.add_node(node)
            self.racks[str(rack_id)] = rack

        util.print_fn("num_racks in cluster: %d" % len(self.racks))
        first_rack = next(iter(self.racks.values()))
        first_rack_first_node = next(iter(first_rack.nodes.values()))
        util.print_fn("num_node_p_rack in cluster: %d" % len(first_rack.nodes))
        util.print_fn("num_gpu_p_node in cluster: %d" %
                      first_rack_first_node.gpu_count)
        util.print_fn("num_cpu_p_node in cluster: %d" %
                      first_rack_first_node.cpu_count)
        util.print_fn("mem_p_node in cluster: %d" %
                      first_rack_first_node.mem_size)
        util.print_fn("Total nodes in cluster: %d " % len(self.nodes))
        util.print_fn("Total racks in cluster: %d " % len(self.racks))
        util.print_fn(
            '--------------------------------- End of cluster spec ---------------------------------'
        )
Example #26
0
 def add_node(self, node):
     if not self.nodes.__contains__(node):
         self.nodes.append(node)
     else:
         util.print_fn("Node already in rack", util.LOG_LEVEL_WARNING)
Example #27
0
 def release_job_gpu(self, num_job=1):
     if num_job < 0:
         util.print_fn("Error: num_job < 0")
         exit()
     self.free_gpu += int(self.num_gpu * num_job)
Example #28
0
 def completion_check(self):
     for num_gpu, gjob in self.gpu_job.items():
         if gjob.end_job != gjob.total_job:
             util.print_fn(
                 '!!!! Miss-match %d completed jobs with %d total jobs in %d-GPU jobs'
                 % (gjob.end_job, gjob.total_job, num_gpu))
Example #29
0
    def reserve_gpus(self, total_num):
        '''
        GPU cluster reserve gpus for gpu_job groups
        '''
        num_group = len(self.gpu_job)
        ave_gpu = math.floor(total_num / num_group)

        job_list = list()
        for num_gpu, gjob in self.gpu_job.items():
            tmp_dict = dict()
            tmp_dict['num_gpu'] = num_gpu
            tmp_dict['used_gpu'] = gjob.total_gpu - gjob.free_gpu
            tmp_dict['demands'] = gjob.get_gpu_demands()
            tmp_dict['cur_gpu'] = gjob.total_gpu
            tmp_dict['cur_free_gpu'] = gjob.free_gpu
            tmp_dict['reserve'] = 0
            job_list.append(tmp_dict)

        total_free_gpu = total_num - sum(k['used_gpu'] for k in job_list)
        total_demands = sum(k['demands'] for k in job_list)
        # print('total_free %d, total_demands %d' % (total_free_gpu, total_demands))
        if total_demands == 0:
            return
        '''demand-based, keep current used_gpu'''
        remain_free_gpu = total_free_gpu
        job_list.sort(key=lambda e: e.__getitem__('demands'))
        for job_dict in job_list:
            if job_dict['demands'] == 0:
                continue

            ratio = round((job_dict['demands'] * 1.0) / total_demands, 2)
            cal_gpu = int(
                math.floor((ratio * total_num) / job_dict['num_gpu']) *
                job_dict['num_gpu'])
            cal_gpu = job_dict[
                'demands'] if job_dict['demands'] <= cal_gpu else cal_gpu
            extra_gpu = cal_gpu - job_dict['used_gpu']
            if extra_gpu <= 0:
                extra_gpu = 0
            elif extra_gpu > remain_free_gpu:
                extra_gpu = int(
                    math.floor(remain_free_gpu / job_dict['num_gpu']) *
                    job_dict['num_gpu'])

            # print('%d-GPU, u%d, cal_gpu %d, extra_g %d' %(job_dict['num_gpu'], job_dict['used_gpu'], cal_gpu, extra_gpu))
            job_dict['reserve'] = job_dict['used_gpu'] + extra_gpu
            remain_free_gpu -= extra_gpu
            # if remain_free_gpu <= 0:
            #     break
        ''' still remaining, give to the right job group'''
        job_list.sort(key=lambda e: e.__getitem__('num_gpu'))
        num_full = 0
        while remain_free_gpu > 0:
            # if all are satisfied
            if num_full >= len(job_list):
                break
            else:
                num_full = 0

            for job_dict in job_list:
                if job_dict['demands'] <= job_dict['reserve']:
                    num_full += 1
                    continue
                if remain_free_gpu >= job_dict['num_gpu']:
                    remain_free_gpu -= job_dict['num_gpu']
                    job_dict['reserve'] += job_dict['num_gpu']
                else:
                    num_full += 1

                if remain_free_gpu <= 0:
                    break

        #execute reservation
        for job_dict in job_list:
            num_gpu = job_dict['num_gpu']
            self.gpu_job[num_gpu].get_gpu_reservation(job_dict['reserve'])
            print("%d-j, T%d, F%d, U%d, N%d, R%d; " %
                  (job_dict['num_gpu'], job_dict['cur_gpu'],
                   job_dict['cur_free_gpu'], job_dict['used_gpu'],
                   job_dict['demands'], job_dict['reserve']),
                  end=' ')

        for num_gpu, gjob in self.gpu_job.items():
            if gjob.free_gpu < 0:
                print("Error free gpu, %d" % num_gpu)
                exit()

        util.print_fn(' %s is done' % sys._getframe().f_code.co_name)