Esempio n. 1
0
    def mapping(self, sorted_jobs, existing_plan, live_nodes, commcost, compcost):
        """def allocate(job, orders, jobson, prec, compcost, commcost):"""
        """ Allocate job to the machine with earliest finish time

        Operates in place
        """


        ## TODO: add finished tasks
        jobson = dict()
        for (node, items) in existing_plan.items():
            for item in items:
                if item.state == ScheduleItem.FINISHED or item.state == ScheduleItem.EXECUTING:
                    jobson[item.job] = node


        new_plan = existing_plan




        def ft(machine):
            #cost = st(machine)
            runtime = compcost(task, machine)
            cost = st(machine, runtime) + runtime
            ##print("machine: %s job:%s cost: %s" % (machine.name, task.id, cost))
            ##print("machine: " + str(machine.name) + " cost: " + str(cost))

            return cost

        if len(live_nodes) != 0:
            ## in case if there is not any live nodes we just return the same cleaned schedule
            for wf, tasks in sorted_jobs:
                ##wf_dag = self.convert_to_parent_children_map(wf)
                wf_dag = HeftHelper.convert_to_parent_children_map(wf)
                prec = reverse_dict(wf_dag)
                for task in tasks:
                    st = partial(self.start_time, wf, task, new_plan, jobson, prec, commcost)

                    # ress = [(key, ft(key)) for key in new_plan.keys()]
                    # agent_pair = min(ress, key=lambda x: x[1][0])
                    # agent = agent_pair[0]
                    # start = agent_pair[1][0]
                    # end = agent_pair[1][1]

                    # agent = min(new_plan.keys(), key=ft)
                    agent = min(live_nodes, key=ft)
                    runtime = compcost(task, agent)
                    start = st(agent, runtime)
                    end = ft(agent)

                    # new_plan[agent].append(ScheduleItem(task, start, end))
                    Schedule.insert_item(new_plan, agent, ScheduleItem(task, start, end))

                    jobson[task] = agent


        new_sched = Schedule(new_plan)
        return new_sched
Esempio n. 2
0
def build_schedule(workflow, estimator, resource_manager, solution):
    """
    the solution consists all parts necessary to build whole solution
    For the moment, it is mentioned that all species taking part in algorithm
    are necessary to build complete solution
    solution = {
        s1.name: val1,
        s2.name: val2,
        ....
    }
    """
    ms = solution[MAPPING_SPECIE]
    os = solution[ORDERING_SPECIE]

    assert check_precedence(workflow, os), "Precedence is violated"

    ms = {t: resource_manager.byName(n) for t, n in ms}
    schedule_mapping = {n: [] for n in set(ms.values())}
    task_to_node = {}
    for t in os:
        node = ms[t]
        t = workflow.byId(t)
        (start_time,
         end_time) = place_task_to_schedule(workflow, estimator,
                                            schedule_mapping, task_to_node, ms,
                                            t, node, 0)

        task_to_node[t.id] = (node, start_time, end_time)
    schedule = Schedule(schedule_mapping)
    return schedule
Esempio n. 3
0
    def schedule(self):
        """
         create inter-priority
        """
        def byPriority(wf):
           return 0 if wf.priority is None else wf.priority

        ##simple inter priority sorting
        sorted_wfs = sorted(self.workflows, key=byPriority)
        wf_jobs = {wf: [] for wf in sorted_wfs}
        resources = self.resource_manager.get_resources()
        ##print("common nodes count:" + str(len(toNodes(resources))))
        nodes = HeftHelper.to_nodes(resources)

        wf_jobs = {wf: self.make_ranking(wf, nodes) for wf in sorted_wfs}

        ##new_schedule = self.get_unchanged_schedule(self.old_schedule, time)
        new_schedule = Schedule({node: [] for node in nodes})
        new_plan = new_schedule.mapping

        for (wf, jobs) in wf_jobs.items():


            new_schedule = self.mapping([(wf, jobs)],
                               new_plan,
                               nodes,
                               self.commcost,
                               self.compcost)
            new_plan = new_schedule.mapping

        return new_schedule
Esempio n. 4
0
 def default_fixed_schedule_part(resource_manager):
     fix_schedule_part = Schedule({
         node: []
         for node in HeftHelper.to_nodes(
             resource_manager.get_resources())
     })
     return fix_schedule_part
Esempio n. 5
0
def run_heft(workflow, resource_manager, estimator):
    """
    It simply runs src with empty initial schedule
    and returns complete schedule
    """
    heft = DynamicHeft(workflow, resource_manager, estimator)
    nodes = resource_manager.get_nodes()
    init_schedule = Schedule({node: [] for node in nodes})
    return heft.run(init_schedule)
Esempio n. 6
0
def run_peft(workflow, resource_manager, estimator):
    """
    It simply runs peft with empty initial schedule
    and returns complete schedule
    """
    oct = PeftHelper.get_OCT(workflow, resource_manager, estimator)
    peft = DynamicPeft(workflow, resource_manager, estimator, oct)
    nodes = resource_manager.get_nodes()
    init_schedule = Schedule({node: [] for node in nodes})
    return peft.run(init_schedule)
Esempio n. 7
0
    def __init__(self, workflow, resource_manager, estimator, ranking=None):
        self.current_schedule = Schedule(dict())
        self.workflow = workflow
        self.resource_manager = resource_manager
        self.estimator = estimator
        self.ranking = ranking

        self.current_time = 0

        nodes = self.get_nodes()
        pass
Esempio n. 8
0
    def clean_unfinished(schedule):
        def clean(items):
            return [
                item for item in items if item.state == ScheduleItem.FINISHED
                or item.state == ScheduleItem.EXECUTING
            ]

        new_mapping = {
            node: clean(items)
            for (node, items) in schedule.mapping.items()
        }
        return Schedule(new_mapping)
Esempio n. 9
0
        def _run_heft():
            dynamic_planner = DynamicHeft(wf, resource_manager, estimator)
            nodes = HeftHelper.to_nodes(resource_manager.resources)
            current_cleaned_schedule = Schedule({node: [] for node in nodes})
            schedule_dynamic_heft = dynamic_planner.run(
                current_cleaned_schedule)

            self._validate(wf, estimator, schedule_dynamic_heft)

            if is_visualized:
                viz.visualize_task_node_mapping(wf, schedule_dynamic_heft)
                # Utility.create_jedule_visualization(schedule_dynamic_heft, wf_name+'_heft')
                pass
            return schedule_dynamic_heft
Esempio n. 10
0
    def __call__(self):
        _wf = wf(self.wf_name)
        rm = ExperimentResourceManager(rg.r([10, 15, 25, 30]))
        estimator = ModelTimeEstimator(bandwidth=10)

        empty_fixed_schedule_part = Schedule({node: [] for node in rm.get_nodes()})

        heft_schedule = run_heft(_wf, rm, estimator)

        fixed_schedule = empty_fixed_schedule_part

        ga_functions = GAFunctions2(_wf, rm, estimator)

        generate = partial(ga_generate, ga_functions=ga_functions,
                           fixed_schedule_part=fixed_schedule,
                           current_time=0.0, init_sched_percent=0.05,
                           initial_schedule=heft_schedule)

        stats = tools.Statistics(lambda ind: ind.fitness.values[0])
        stats.register("avg", numpy.mean)
        stats.register("std", numpy.std)
        stats.register("min", numpy.min)
        stats.register("max", numpy.max)

        logbook = tools.Logbook()
        logbook.header = ["gen", "evals"] + stats.fields

        toolbox = Toolbox()
        toolbox.register("generate", generate)
        toolbox.register("evaluate", fit_converter(ga_functions.build_fitness(empty_fixed_schedule_part, 0.0)))
        toolbox.register("clone", deepcopy)
        toolbox.register("mate", ga_functions.crossover)
        toolbox.register("sweep_mutation", ga_functions.sweep_mutation)
        toolbox.register("mutate", ga_functions.mutation)
        # toolbox.register("select_parents", )
        # toolbox.register("select", tools.selTournament, tournsize=4)
        toolbox.register("select", tools.selRoulette)
        pop, logbook, best = run_ga(toolbox=toolbox,
                                logbook=logbook,
                                stats=stats,
                                **self.GA_PARAMS)

        resulted_schedule = ga_functions.build_schedule(best, empty_fixed_schedule_part, 0.0)

        ga_makespan = Utility.makespan(resulted_schedule)
        return (ga_makespan, resulted_schedule, logbook)
Esempio n. 11
0
def generate(wf, rm, estimator, schedule=None, fixed_schedule_part=None, current_time=0.0):
    sched = schedule if schedule is not None else SimpleRandomizedHeuristic(wf, rm.get_nodes(), estimator).schedule(fixed_schedule_part, current_time)

    if fixed_schedule_part is not None:
        un_tasks = unmoveable_tasks(fixed_schedule_part)
        clean_sched = Schedule({node: [item for item in items if item.job.id not in un_tasks and item.state != ScheduleItem.FAILED]
                          for node, items in sched.mapping.items()})
    else:
        clean_sched = sched

    mapping, ordering = ord_and_map(clean_sched)
    ordering_numseq = ordering_to_numseq(ordering)
    ordering_map = {task_id: val for task_id, val in zip(ordering, ordering_numseq)}
    ord_p, map_p = OrderingParticle(ordering_map), MappingParticle(mapping)
    ord_p.velocity = OrderingParticle.Velocity({})
    map_p.velocity = MappingParticle.Velocity({})

    result = CompoundParticle(map_p, ord_p)
    if schedule is None and not validate_mapping_with_alive_nodes(result.mapping.entity, rm):
        raise Exception("found invalid solution in generated array")
    return result
Esempio n. 12
0
    def __init__(self, workflow, resource_manager, estimator, ranking=None):
        self.current_schedule = Schedule(dict())
        self.workflow = workflow
        self.resource_manager = resource_manager
        self.estimator = estimator
        self.ranking = ranking

        self.current_time = 0

        nodes = self.get_nodes()



        # print("A: " + str(self.wf_jobs))

        #TODO: remove it later
        # to_print = ''
        # for job in self.wf_jobs:
        #     to_print = to_print + str(job.id) + " "
        # print(to_print)
        pass
Esempio n. 13
0
    def __call__(self, chromo, current_time):

        (schedule_mapping, finished_tasks, ready_tasks, chrmo_mapping,
         task_to_node) = self._create_helping_structures(chromo)

        chromo_copy = dict()
        for (nd_name, items) in chromo.items():
            chromo_copy[nd_name] = []
            for item in items:
                chromo_copy[nd_name].append(item)

        alive_nodes = [node for node in self.nodes if node.state != Node.Down]
        if len(alive_nodes) == 0:
            raise Exception("There are not alive nodes")

        while len(ready_tasks) > 0:

            for node in alive_nodes:
                if len(chromo_copy[node.name]) == 0:
                    continue
                if node.state == Node.Down:
                    continue

                ## TODO: Urgent! completely rethink this procedure

                tsk_id = None
                for i in range(len(chromo_copy[node.name])):
                    if chromo_copy[node.name][i] in ready_tasks:
                        tsk_id = chromo_copy[node.name][i]
                        break

                if tsk_id is not None:
                    task = self.task_map[tsk_id]
                    #del chromo_copy[node.name][0]
                    chromo_copy[node.name].remove(tsk_id)
                    ready_tasks.remove(tsk_id)

                    time_slots, runtime = self._get_possible_execution_times(
                        schedule_mapping, task_to_node, chrmo_mapping, task,
                        node, current_time)

                    time_slot = next(time_slots)
                    start_time = time_slot[0]
                    end_time = start_time + runtime

                    item = ScheduleItem(task, start_time, end_time)

                    # need to account current time
                    Schedule.insert_item(schedule_mapping, node, item)
                    task_to_node[task.id] = (node, start_time, end_time)

                    finished_tasks.add(task.id)

                    #ready_children = [child for child in task.children if self._is_child_ready(finished_tasks, child)]
                    ready_children = self._get_ready_tasks(
                        task.children, finished_tasks)
                    for child in ready_children:
                        ready_tasks.append(child.id)

        schedule = Schedule(schedule_mapping)
        return schedule
    def schedule(self, fixed_schedule_part=None, current_time=0.0):

        estimate = self.estimator.estimate_transfer_time

        # TODO: make common utility function with ScheduleBuilder
        def is_last_version_of_task_executing(item):
            return item.state == ScheduleItem.EXECUTING or item.state == ScheduleItem.FINISHED or item.state == ScheduleItem.UNSTARTED

        def _get_ready_tasks(children, finished_tasks):
            def _is_child_ready(child):
                ids = set([p.id for p in child.parents])
                result = False in [id in finished_tasks for id in ids]
                return not result

            ready_children = [
                child for child in children if _is_child_ready(child)
            ]
            return ready_children

        if fixed_schedule_part is None:
            schedule_mapping = {node: [] for node in self.nodes}
            ready_tasks = [
                child.id for child in self.workflow.head_task.children
            ]
            task_to_node = dict()
            finished_tasks = set()
        else:
            schedule_mapping = {
                node: [item for item in items]
                for (node, items) in fixed_schedule_part.mapping.items()
            }
            finished_tasks = [
                item.job.id
                for (node, items) in fixed_schedule_part.mapping.items()
                for item in items if is_last_version_of_task_executing(item)
            ]
            finished_tasks = set([self.workflow.head_task.id] + finished_tasks)
            unfinished = [
                task for task in self.workflow.get_all_unique_tasks()
                if not task.id in finished_tasks
            ]
            ready_tasks = [
                task.id
                for task in _get_ready_tasks(unfinished, finished_tasks)
            ]
            task_to_node = {
                item.job.id: (node, item.start_time, item.end_time)
                for (node, items) in fixed_schedule_part.mapping.items()
                for item in items if is_last_version_of_task_executing(item)
            }

        def is_child_ready(child):
            ids = set([p.id for p in child.parents])
            result = False in [id in finished_tasks for id in ids]
            return not result

        def find_slots(node, comm_ready, runtime):
            node_schedule = schedule_mapping.get(node, list())
            free_time = 0 if len(
                node_schedule) == 0 else node_schedule[-1].end_time
            ## TODO: refactor it later
            f_time = max(free_time, comm_ready)
            f_time = max(f_time, current_time)
            base_variant = [(f_time, f_time + runtime + 1)]
            zero_interval = [] if len(node_schedule) == 0 else [
                (0, node_schedule[0].start_time)
            ]
            middle_intervals = [(node_schedule[i].end_time,
                                 node_schedule[i + 1].start_time)
                                for i in range(len(node_schedule) - 1)]
            intervals = zero_interval + middle_intervals + base_variant

            #result = [(st, end) for (st, end) in intervals if st >= comm_ready and end - st >= runtime]
            ## TODO: rethink rounding
            result = [
                (st, end) for (st, end) in intervals
                if (current_time < st or abs((current_time - st)) < 0.01)
                and st >= comm_ready and (
                    runtime < (end - st) or abs((end - st) - runtime) < 0.01)
            ]
            return result

        def comm_ready_func(task, node):
            ##TODO: remake this stub later.
            if len(task.parents) == 1 and self.workflow.head_task.id == list(
                    task.parents)[0].id:
                return 0
            return max([
                task_to_node[p.id][2] +
                estimate(node, task_to_node[p.id][0], task, p)
                for p in task.parents
            ])

        def get_possible_execution_times(task, node):
            ## pay attention to the last element in the resulted seq
            ## it represents all available time of node after it completes all its work
            ## (if such interval can exist)
            ## time_slots = [(st1, end1),(st2, end2,...,(st_last, st_last + runtime)]
            runtime = self.estimator.estimate_runtime(task, node)
            comm_ready = comm_ready_func(task, node)
            time_slots = find_slots(node, comm_ready, runtime)
            return time_slots, runtime

        while len(ready_tasks) > 0:
            choosed_index = random.randint(0, len(ready_tasks) - 1)
            task = self.task_map[ready_tasks[choosed_index]]

            #TODO: make checking for all nodes are dead.(It's a very rare situation so it is not consider for now)
            alive_nodes = [
                node for node in self.nodes if node.state != Node.Down
            ]
            choosed_node_index = random.randint(0, len(alive_nodes) - 1)
            node = alive_nodes[choosed_node_index]

            time_slots, runtime = get_possible_execution_times(task, node)
            choosed_time_index = 0 if len(time_slots) == 1 else random.randint(
                0,
                len(time_slots) - 1)
            time_slot = time_slots[choosed_time_index]

            start_time = time_slot[0]
            end_time = start_time + runtime

            item = ScheduleItem(task, start_time, end_time)
            ##schedule_mapping[node].append(item)
            Schedule.insert_item(schedule_mapping, node, item)
            task_to_node[task.id] = (node, start_time, end_time)

            ##print('I am here')
            ready_tasks.remove(task.id)
            finished_tasks.add(task.id)

            ready_children = [
                child for child in task.children if is_child_ready(child)
            ]
            for child in ready_children:
                ready_tasks.append(child.id)

        schedule = Schedule(schedule_mapping)
        return schedule
Esempio n. 15
0
    def __call__(self, chromo, current_time):

        count_of_tasks = lambda mapping: reduce(operator.add, (
            len(tasks) for node, tasks in mapping.items()), 0)
        alive_nodes = [node for node in self.nodes if node.state != Node.Down]

        alive_nodes_names = [node.name for node in alive_nodes]
        for node_name, tasks in chromo.items():
            if node_name not in alive_nodes_names and len(tasks) > 0:
                raise ValueError(
                    "Chromo is invalid. There is a task assigned to a dead node"
                )
        if count_of_tasks(chromo) + len(
                self.fixed_schedule_part.get_unfailed_tasks_ids()) != len(
                    self.workflow.get_all_unique_tasks()):

            print("==Chromosome==================================")
            print(chromo)
            print("=fixed_schedule_part===================================")
            print(self.fixed_schedule_part)

            raise Exception(
                "The chromosome not a full. Chromo length: {0}, Fixed part length: {1}, workflow size: {2}"
                .format(count_of_tasks(chromo),
                        len(self.fixed_schedule_part.get_unfailed_tasks_ids()),
                        len(self.workflow.get_all_unique_tasks())))

        # TODO: add not to schedule
        #if count_of_tasks(chromo) + count_of_tasks(self.fixed_schedule_part.mapping) !=

        (schedule_mapping, finished_tasks, ready_tasks, chrmo_mapping,
         task_to_node) = self._create_helping_structures(chromo)

        # print("SCHEDULE_MAPPING")
        # print("AlIVE_NODES", alive_nodes)
        # pprint(schedule_mapping)

        #chromo_copy = {nd_name: [item for item in items] for (nd_name, items) in chromo.items()}
        chromo_copy = deepcopy(chromo)

        if len(alive_nodes) == 0:
            raise Exception("There are not alive nodes")

        #print("Building started...")
        while len(ready_tasks) > 0:

            # ## TODO: only for debug. Remove it later.
            # print("alive nodes: {0}".format(alive_nodes))
            # for node_name, tasks in chromo_copy.items():
            #     print("Node: {0}, tasks count: {1}".format(node_name, len(tasks)))

            count_before = count_of_tasks(chromo_copy)
            if len(alive_nodes) == 0:
                raise ValueError("Count of alive_nodes is zero")
            for node in alive_nodes:
                if len(chromo_copy[node.name]) == 0:
                    continue
                ## TODO: Urgent! completely rethink this procedure

                tsk_id = None
                for i in range(len(chromo_copy[node.name])):
                    if chromo_copy[node.name][i] in ready_tasks:
                        tsk_id = chromo_copy[node.name][i]
                        break

                if tsk_id is not None:
                    task = self.task_map[tsk_id]
                    #del chromo_copy[node.name][0]
                    chromo_copy[node.name].remove(tsk_id)
                    ready_tasks.remove(tsk_id)

                    (start_time, end_time) = place_task_to_schedule(
                        self.workflow, self.estimator, schedule_mapping,
                        task_to_node, chrmo_mapping, task, node, current_time)

                    task_to_node[task.id] = (node, start_time, end_time)

                    finished_tasks.add(task.id)

                    ready_children = self._get_ready_tasks(
                        task.children, finished_tasks)
                    for child in ready_children:
                        ready_tasks.append(child.id)
            count_after = count_of_tasks(chromo_copy)
            if count_before == count_after:
                raise Exception(
                    "Unable to properly process a chromosome."
                    " Perhaps, due to invalid fixed_schedule_part or the chromosome."
                )
            pass
        schedule = Schedule(schedule_mapping)
        return schedule