Ejemplo n.º 1
0
    def get_ready_tasks(self, ptask, pnode):
        unstarted_items = []
        next_for_ptask = self.initial_schedule.get_next_item(ptask)
        #next_for_ptask = [] if next_for_ptask is None else [next_for_ptask.job]
        tsks = [
            tsk for tsk in ptask.children
            if self.is_ready(tsk) and self.is_next_to_run(tsk)
        ]
        ##TODO: refactor it later
        if next_for_ptask is not None and next_for_ptask.job not in tsks and self.is_ready(
                next_for_ptask.job) and self.is_next_to_run(
                    next_for_ptask.job):
            tsks.append(next_for_ptask.job)

        # tsks mustn't be finished, executing or their node is Down
        def appropriate_to_run(tsk):
            if tsk.id in self.finished_tasks:
                return False
            if self.current_schedule.is_executing(tsk):
                return False
            nd = self.initial_schedule.place(tsk)[0]
            if self.resource_manager.node(nd).state == Node.Down:
                return False
            return True

        tsks = [tsk for tsk in tsks if appropriate_to_run(tsk)]

        for child in tsks:
            (node, item) = self.initial_schedule.place(child)

            ## TODO: remake it later
            # transf = 0 if pnode is None else self.estimator.estimate_transfer_time(pnode, node, ptask, child)
            # runtime = item.end_time - item.start_time
            # start_time = self.current_time + transf
            # end_time = start_time + runtime

            sitems = self.current_schedule.mapping.items()
            pids = [p.id for p in child.parents]
            mp = {
                it.job.id: (pnd, it)
                for (pnd, items) in sitems for it in items
                if (it.job.id in pids) and (it.state == ScheduleItem.FINISHED)
            }
            estms = [
                it.end_time +
                self.estimator.estimate_transfer_time(pnd, node, it.job, child)
                for (id, (pnd, it)) in mp.items()
            ]
            transf_end = 0 if len(estms) == 0 else max(estms)

            runtime = item.end_time - item.start_time
            start_time = max(self.current_time, transf_end)
            end_time = start_time + runtime

            actual_sched_item = ScheduleItem(item.job, start_time, end_time)
            unstarted_items.append(actual_sched_item)
        return unstarted_items
Ejemplo n.º 2
0
        def as_schedule(dct):
            if '__cls__' in dct and dct['__cls__'] == 'Node':
                res = dct['resource']
                node = Node(dct['name'], res, dct['soft'])
                node.flops = dct['flops']
                return node
            if '__cls__' in dct and dct['__cls__'] == 'ScheduleItem':
                task = task_dict[dct['job']]
                scItem = ScheduleItem(task, dct['start_time'], dct['end_time'])
                scItem.state = dct['state']
                return scItem
            if '__cls__' in dct and dct['__cls__'] == 'Schedule':
                mapping = {
                    node_values['node']: node_values['value']
                    for node_values in dct['mapping']
                }
                schedule = Schedule(mapping)
                return schedule
            if '__cls__' in dct and dct['__cls__'] == 'Resource':
                res = Resource(dct['name'])
                res.nodes = dct['nodes']
                return res
            if '__cls__' in dct and dct['__cls__'] == 'SaveBundle':

                all_nodes = set()
                for res in dct['dedicated_resources']:
                    for node in res.nodes:
                        node.resource = res
                    all_nodes.update(res.nodes)

                all_nodes = {node.name: node for node in all_nodes}

                dct['ga_schedule'].mapping = {
                    all_nodes[node_name]: values
                    for (node_name,
                         values) in dct['ga_schedule'].mapping.items()
                }

                bundle = SaveBundle(dct['name'], dct['dedicated_resources'],
                                    dct['transfer_mx'], dct['ideal_flops'],
                                    dct['ga_schedule'], dct['wf_name'])
                return bundle
            return dct
Ejemplo n.º 3
0
            def set_proper_state(item):

                new_item = ScheduleItem.copy(item)

                non_finished = new_item.state == ScheduleItem.EXECUTING or new_item.state == ScheduleItem.UNSTARTED
                ## TODO: Urgent!: dangerous place
                if non_finished and new_item.end_time <= front_event.end_time:
                    new_item.state = ScheduleItem.FINISHED
                if non_finished and new_item.end_time > front_event.end_time:
                    new_item.state = ScheduleItem.EXECUTING
                return new_item
Ejemplo n.º 4
0
    def mapping(self, sorted_jobs, existing_plan, nodes, commcost, compcost):
        """def allocate(job, orders, jobson, prec, compcost, commcost):"""
        """ Allocate job to the machine with earliest finish time

        Operates in place
        """


        ## TODO: add finished tasks
        jobson = dict()
        for (node, items) in existing_plan.items():
            for item in items:
                if item.state == ScheduleItem.FINISHED or item.state == ScheduleItem.EXECUTING:
                    jobson[item.job] = node


        new_plan = existing_plan




        def ft(machine):
            #cost = st(machine)
            runtime = compcost(task, machine)
            cost = st(machine, runtime) + runtime
            ##print("machine: %s job:%s cost: %s" % (machine.name, task.id, cost))
            ##print("machine: " + str(machine.name) + " cost: " + str(cost))

            return cost

        for wf, tasks in sorted_jobs:
            ##wf_dag = self.convert_to_parent_children_map(wf)
            wf_dag = HeftHelper.convert_to_parent_children_map(wf)
            prec = reverse_dict(wf_dag)
            for task in tasks:
                st = partial(self.start_time, wf, task, new_plan, jobson, prec, commcost)

                # ress = [(key, ft(key)) for key in new_plan.keys()]
                # agent_pair = min(ress, key=lambda x: x[1][0])
                # agent = agent_pair[0]
                # start = agent_pair[1][0]
                # end = agent_pair[1][1]

                agent = min(new_plan.keys(), key=ft)
                runtime = compcost(task, agent)
                start = st(agent, runtime)
                end = ft(agent)

                # new_plan[agent].append(ScheduleItem(task, start, end))
                Schedule.insert_item(new_plan, agent, ScheduleItem(task, start, end))

                jobson[task] = agent
        new_sched = Schedule(new_plan)
        return new_sched
Ejemplo n.º 5
0
        def as_schedule(dct):
            if '__cls__' in dct and dct['__cls__'] == 'Node':
                res = dct['resource']
                node = Node(dct['name'], res, dct['soft'])
                node.flops = dct['flops']
                return node
            if '__cls__' in dct and dct['__cls__'] == 'ScheduleItem':
                task = task_dict[dct['job']]
                scItem = ScheduleItem(task, dct['start_time'], dct['end_time'])
                scItem.state = dct['state']
                return scItem
            if '__cls__' in dct and dct['__cls__'] == 'Schedule':
                mapping = {node_values['node']: node_values['value'] for node_values in dct['mapping']}
                schedule = Schedule(mapping)
                return schedule
            if '__cls__' in dct and dct['__cls__'] == 'Resource':
                res = Resource(dct['name'])
                res.nodes = dct['nodes']
                return res
            if '__cls__' in dct and dct['__cls__'] == 'SaveBundle':

                all_nodes = set()
                for res in dct['dedicated_resources']:
                    for node in res.nodes:
                        node.resource = res
                    all_nodes.update(res.nodes)

                all_nodes = {node.name: node for node in all_nodes}

                dct['ga_schedule'].mapping = {all_nodes[node_name]: values for (node_name, values) in
                                              dct['ga_schedule'].mapping.items()}

                bundle = SaveBundle(dct['name'],
                                    dct['dedicated_resources'],
                                    dct['transfer_mx'],
                                    dct['ideal_flops'],
                                    dct['ga_schedule'],
                                    dct['wf_name'])
                return bundle
            return dct
Ejemplo n.º 6
0
def place_task_to_schedule(workflow, estimator, schedule_mapping, task_to_node,
                           chrmo_mapping, task, node, current_time):

    runtime = estimator.estimate_runtime(task, node)
    comm_ready = _comm_ready_func(workflow, estimator, task_to_node,
                                  chrmo_mapping, task, node)

    def _check(st, end):
        return (0.00001 < (st - current_time)) \
                   and st >= comm_ready and (0.00001 < (end - st) - runtime)

    node_schedule = schedule_mapping.get(node, list())

    ## TODO: add case for inserting between nothing and first element
    size = len(node_schedule)
    result = None
    i = 0
    if size > 0 and _check(0, node_schedule[0]):
        i = -1
        result = (0, node_schedule[0].start_time)
    else:
        while i < size - 1:
            st = node_schedule[i].end_time
            end = node_schedule[i + 1].start_time
            if _check(st, end):
                break
            i += 1
        if i < size - 1:
            result = (st, end)
        else:
            free_time = 0 if len(
                node_schedule) == 0 else node_schedule[-1].end_time
            ## TODO: refactor it later
            f_time = max(free_time, comm_ready)
            f_time = max(f_time, current_time)
            result = (f_time, f_time + runtime)
            i = size - 1
            pass
        pass

    previous_elt = i
    st_time = result[0]
    end_time = st_time + runtime
    item = ScheduleItem(task, st_time, end_time)

    node_schedule.insert(previous_elt + 1, item)

    schedule_mapping[node] = node_schedule
    return (st_time, end_time)
Ejemplo n.º 7
0
        def _get_front_line(schedule, current_time, fixed_interval):
            event_time = current_time + fixed_interval
            min_item = ScheduleItem.MIN_ITEM()

            for (node, items) in schedule.mapping.items():
                for item in items:
                    ## It accounts case when event_time appears in a transfer gap(rare situation for all nodes)
                    ## TODO: compare with some precison
                    if event_time < item.end_time < min_item.end_time:
                        min_item = item
                        break

            if min_item.job is None:
                return None
            print("Time: " + str(current_time) + " reschedule point have been founded st:" + str(min_item.start_time) + " end:" + str(min_item.end_time))
            return min_item
Ejemplo n.º 8
0
    def _node_up_handler(self, event):
        # check node up
        self.resource_manager.node(event.node).state = Node.Unknown
        #get next task for this node
        next_sched_item = []
        for item in self.initial_schedule.mapping[event.node]:
            if item.job.id not in self.finished_tasks:
                next_sched_item = item
                break

        runtime = next_sched_item.end_time - next_sched_item.start_time
        start_time = self.current_time
        end_time = start_time + runtime

        actual_sched_item = ScheduleItem(next_sched_item.job, start_time,
                                         end_time)
        self.post_new_events([actual_sched_item])
        pass
Ejemplo n.º 9
0
 def init(self):
     if self.initial_schedule is None:
         self.current_schedule = Schedule(
             {node: []
              for node in self.heft_planner.get_nodes()})
         self.current_schedule = self.heft_planner.run(
             self.current_schedule)
     else:
         id_to_task = {
             tsk.id: tsk
             for tsk in HeftHelper.get_all_tasks(self.heft_planner.workflow)
         }
         mapping = {
             node: [
                 ScheduleItem(id_to_task[item.job.id], item.start_time,
                              item.end_time) for item in items
             ]
             for (node, items) in self.initial_schedule.mapping.items()
         }
         self.current_schedule = Schedule(mapping)
     self._post_new_events()
Ejemplo n.º 10
0
    def schedule(self, fixed_schedule_part=None, current_time=0.0):

        estimate = self.estimator.estimate_transfer_time

        # TODO: make common utility function with ScheduleBuilder
        def is_last_version_of_task_executing(item):
            return item.state == ScheduleItem.EXECUTING or item.state == ScheduleItem.FINISHED or item.state == ScheduleItem.UNSTARTED

        def _get_ready_tasks(children, finished_tasks):
            def _is_child_ready(child):
                ids = set([p.id for p in child.parents])
                result = False in [id in finished_tasks for id in ids]
                return not result

            ready_children = [
                child for child in children if _is_child_ready(child)
            ]
            return ready_children

        if fixed_schedule_part is None:
            schedule_mapping = {node: [] for node in self.nodes}
            ready_tasks = [
                child.id for child in self.workflow.head_task.children
            ]
            task_to_node = dict()
            finished_tasks = set()
        else:
            schedule_mapping = {
                node: [item for item in items]
                for (node, items) in fixed_schedule_part.mapping.items()
            }
            finished_tasks = [
                item.job.id
                for (node, items) in fixed_schedule_part.mapping.items()
                for item in items if is_last_version_of_task_executing(item)
            ]
            finished_tasks = set([self.workflow.head_task.id] + finished_tasks)
            unfinished = [
                task for task in self.workflow.get_all_unique_tasks()
                if not task.id in finished_tasks
            ]
            ready_tasks = [
                task.id
                for task in _get_ready_tasks(unfinished, finished_tasks)
            ]
            task_to_node = {
                item.job.id: (node, item.start_time, item.end_time)
                for (node, items) in fixed_schedule_part.mapping.items()
                for item in items if is_last_version_of_task_executing(item)
            }

        def is_child_ready(child):
            ids = set([p.id for p in child.parents])
            result = False in [id in finished_tasks for id in ids]
            return not result

        def find_slots(node, comm_ready, runtime):
            node_schedule = schedule_mapping.get(node, list())
            free_time = 0 if len(
                node_schedule) == 0 else node_schedule[-1].end_time
            ## TODO: refactor it later
            f_time = max(free_time, comm_ready)
            f_time = max(f_time, current_time)
            base_variant = [(f_time, f_time + runtime + 1)]
            zero_interval = [] if len(node_schedule) == 0 else [
                (0, node_schedule[0].start_time)
            ]
            middle_intervals = [(node_schedule[i].end_time,
                                 node_schedule[i + 1].start_time)
                                for i in range(len(node_schedule) - 1)]
            intervals = zero_interval + middle_intervals + base_variant

            #result = [(st, end) for (st, end) in intervals if st >= comm_ready and end - st >= runtime]
            ## TODO: rethink rounding
            result = [
                (st, end) for (st, end) in intervals
                if (current_time < st or abs((current_time - st)) < 0.01)
                and st >= comm_ready and (
                    runtime < (end - st) or abs((end - st) - runtime) < 0.01)
            ]
            return result

        def comm_ready_func(task, node):
            ##TODO: remake this stub later.
            if len(task.parents) == 1 and self.workflow.head_task.id == list(
                    task.parents)[0].id:
                return 0
            return max([
                task_to_node[p.id][2] +
                estimate(node, task_to_node[p.id][0], task, p)
                for p in task.parents
            ])

        def get_possible_execution_times(task, node):
            ## pay attention to the last element in the resulted seq
            ## it represents all available time of node after it completes all its work
            ## (if such interval can exist)
            ## time_slots = [(st1, end1),(st2, end2,...,(st_last, st_last + runtime)]
            runtime = self.estimator.estimate_runtime(task, node)
            comm_ready = comm_ready_func(task, node)
            time_slots = find_slots(node, comm_ready, runtime)
            return time_slots, runtime

        while len(ready_tasks) > 0:
            choosed_index = random.randint(0, len(ready_tasks) - 1)
            task = self.task_map[ready_tasks[choosed_index]]

            #TODO: make checking for all nodes are dead.(It's a very rare situation so it is not consider for now)
            alive_nodes = [
                node for node in self.nodes if node.state != Node.Down
            ]
            choosed_node_index = random.randint(0, len(alive_nodes) - 1)
            node = alive_nodes[choosed_node_index]

            time_slots, runtime = get_possible_execution_times(task, node)
            choosed_time_index = 0 if len(time_slots) == 1 else random.randint(
                0,
                len(time_slots) - 1)
            time_slot = time_slots[choosed_time_index]

            start_time = time_slot[0]
            end_time = start_time + runtime

            item = ScheduleItem(task, start_time, end_time)
            ##schedule_mapping[node].append(item)
            Schedule.insert_item(schedule_mapping, node, item)
            task_to_node[task.id] = (node, start_time, end_time)

            ##print('I am here')
            ready_tasks.remove(task.id)
            finished_tasks.add(task.id)

            ready_children = [
                child for child in task.children if is_child_ready(child)
            ]
            for child in ready_children:
                ready_tasks.append(child.id)

        schedule = Schedule(schedule_mapping)
        return schedule