Python Schedule Examples, heft.core.environment.ResourceManager.Schedule Python Examples

Example #1

0

Show file

File: HeftExecutor.py Project: visheratin/heft

 def init(self):
     if self.initial_schedule is None:
         self.current_schedule  = Schedule({node:[] for node in self.heft_planner.get_nodes()})
         self.current_schedule = self.heft_planner.run(self.current_schedule)
     else:
         id_to_task = {tsk.id: tsk for tsk in HeftHelper.get_all_tasks(self.heft_planner.workflow)}
         mapping = {node: [ScheduleItem(id_to_task[item.job.id], item.start_time, item.end_time) for item in items] for (node, items) in self.initial_schedule.mapping.items()}
         self.current_schedule = Schedule(mapping)
     self._post_new_events()

Example #2

0

Show file

File: simple_heft.py Project: fonhorst/heft

    def mapping(self, sorted_jobs, existing_plan, nodes, commcost, compcost):
        """def allocate(job, orders, jobson, prec, compcost, commcost):"""
        """ Allocate job to the machine with earliest finish time

        Operates in place
        """


        ## TODO: add finished tasks
        jobson = dict()
        for (node, items) in existing_plan.items():
            for item in items:
                if item.state == ScheduleItem.FINISHED or item.state == ScheduleItem.EXECUTING:
                    jobson[item.job] = node


        new_plan = existing_plan




        def ft(machine):
            #cost = st(machine)
            runtime = compcost(task, machine)
            cost = st(machine, runtime) + runtime
            ##print("machine: %s job:%s cost: %s" % (machine.name, task.id, cost))
            ##print("machine: " + str(machine.name) + " cost: " + str(cost))

            return cost

        for wf, tasks in sorted_jobs:
            ##wf_dag = self.convert_to_parent_children_map(wf)
            wf_dag = HeftHelper.convert_to_parent_children_map(wf)
            prec = reverse_dict(wf_dag)
            for task in tasks:
                st = partial(self.start_time, wf, task, new_plan, jobson, prec, commcost)

                # ress = [(key, ft(key)) for key in new_plan.keys()]
                # agent_pair = min(ress, key=lambda x: x[1][0])
                # agent = agent_pair[0]
                # start = agent_pair[1][0]
                # end = agent_pair[1][1]

                agent = min(new_plan.keys(), key=ft)
                runtime = compcost(task, agent)
                start = st(agent, runtime)
                end = ft(agent)

                # new_plan[agent].append(ScheduleItem(task, start, end))
                Schedule.insert_item(new_plan, agent, ScheduleItem(task, start, end))

                jobson[task] = agent
        new_sched = Schedule(new_plan)
        return new_sched

Example #3

0

Show file

File: simple_heft.py Project: fonhorst/heft

    def schedule(self):
        """
         create inter-priority
        """
        def byPriority(wf):
           return 0 if wf.priority is None else wf.priority

        ##simple inter priority sorting
        sorted_wfs = sorted(self.workflows, key=byPriority)
        wf_jobs = {wf: [] for wf in sorted_wfs}
        resources = self.resource_manager.get_resources()
        ##print("common nodes count:" + str(len(toNodes(resources))))
        nodes = HeftHelper.to_nodes(resources)

        wf_jobs = {wf: self.make_ranking(wf, nodes) for wf in sorted_wfs}

        ##new_schedule = self.get_unchanged_schedule(self.old_schedule, time)
        new_schedule = Schedule({node: [] for node in nodes})
        new_plan = new_schedule.mapping

        for (wf, jobs) in wf_jobs.items():


            new_schedule = self.mapping([(wf, jobs)],
                               new_plan,
                               nodes,
                               self.commcost,
                               self.compcost)
            new_plan = new_schedule.mapping

        return new_schedule

Example #4

0

Show file

File: mapordschedule.py Project: fonhorst/heft

def build_schedule(workflow, estimator, resource_manager, solution):
    """
    the solution consists all parts necessary to build whole solution
    For the moment, it is mentioned that all species taking part in algorithm
    are necessary to build complete solution
    solution = {
        s1.name: val1,
        s2.name: val2,
        ....
    }
    """
    ms = solution[MAPPING_SPECIE]
    os = solution[ORDERING_SPECIE]

    assert check_precedence(workflow, os), "Precedence is violated"

    ms = {t: resource_manager.byName(n) for t, n in ms}
    schedule_mapping = {n: [] for n in set(ms.values())}
    task_to_node = {}
    for t in os:
        node = ms[t]
        t = workflow.byId(t)
        (start_time,
         end_time) = place_task_to_schedule(workflow, estimator,
                                            schedule_mapping, task_to_node, ms,
                                            t, node, 0)

        task_to_node[t.id] = (node, start_time, end_time)
    schedule = Schedule(schedule_mapping)
    return schedule

Example #5

0

Show file

File: CloudHeftExecutor.py Project: fonhorst/heft

    def clean_events(self, event):

        # remove all unstarted tasks
        cleaned_task = set()
        if isinstance(event, NodeFailed):
            cleaned_task = set([event.task])

        new_mapping = dict()
        for (node, items) in self.current_schedule.mapping.items():
            new_mapping[node] = []
            for item in items:
                if item.state != ScheduleItem.UNSTARTED:
                    new_mapping[node].append(item)
                else:
                    cleaned_task.add(item.job)
        clean_schedule = Schedule(new_mapping)
        # remove all events associated with these tasks
        prm = self.public_resources_manager

        def check(event):
            if isinstance(
                    event, TaskStart
            ) and event.task in cleaned_task and not prm.isCloudNode(
                    event.node):
                return False
            if isinstance(
                    event, TaskFinished
            ) and event.task in cleaned_task and not prm.isCloudNode(
                    event.node):
                return False
            return True

        new_queue = deque([evnt for evnt in self.queue if check(evnt)])
        self.queue = new_queue
        return clean_schedule

Example #6

0

Show file

File: BaseExecutor.py Project: fonhorst/heft

    def _clean_events(self, event):

        # remove all unstarted tasks
        cleaned_task = set()
        if isinstance(event, NodeFailed):
            cleaned_task = set([event.task])

        new_mapping = dict()
        for (node, items) in self.current_schedule.mapping.items():
            new_mapping[node] = []
            for item in items:
                if item.state != ScheduleItem.UNSTARTED:
                    new_mapping[node].append(item)
                else:
                    cleaned_task.add(item.job)
        clean_schedule = Schedule(new_mapping)
        # remove all events associated with these tasks
        def check(event):
            if isinstance(event, TaskStart) and event.task in cleaned_task:
                return False
            if isinstance(event, TaskFinished) and event.task in cleaned_task:
                return False
            return True
        ##TODO: refactor it later
        self.queue = deque([event for event in self.queue if check(event)])
        return clean_schedule

Example #7

0

Show file

def fnc():
    empty_schedule = Schedule(
        {node: []
         for node in resource_manager.get_nodes()})
    res = ga(empty_schedule, None)
    print(res)
    pass

Example #8

0

Show file

File: AddingWFgaheftExample.py Project: fonhorst/heft

def gaheft_reschedule(wf_added_time):

    copy_gaheft_schedule = Schedule({
        node: [item for item in items]
        for (node, items) in ga_initial_schedule.mapping.items()
    })

    added_time = all_initial_wf_time * wf_added_time

    mark_finished(copy_gaheft_schedule)
    gaheft_added = DynamicHeft(added_wf, resource_manager, estimator)
    gaheft_added.current_time = added_time
    gaheft_added_schedule = gaheft_added.run(copy_gaheft_schedule)
    new_ga = GAComputationManager(15, added_wf, resource_manager, estimator)

    gaheft_added_schedule = new_ga.run(gaheft_added_schedule, added_time,
                                       False)[2]

    mark_finished(gaheft_added_schedule)

    nodes_seq_validaty = Utility.validateNodesSeq(gaheft_added_schedule)
    if nodes_seq_validaty is not True:
        raise Exception("Check for nodes_seq_validaty didn't pass")
    initial_wf_validaty = Utility.validateParentsAndChildren(
        gaheft_added_schedule, initial_wf)
    if initial_wf_validaty is not True:
        raise Exception("Check for initial_wf_validaty didn't pass")
    added_wf_validaty = Utility.validateParentsAndChildren(
        gaheft_added_schedule, added_wf)
    if added_wf_validaty is not True:
        raise Exception("Check for added_wf_validaty didn't pass")
    #print("All Ok!")
    result = Utility.makespan(gaheft_added_schedule)
    return result

Example #9

0

Show file

 def default_fixed_schedule_part(resource_manager):
     fix_schedule_part = Schedule({
         node: []
         for node in HeftHelper.to_nodes(
             resource_manager.get_resources())
     })
     return fix_schedule_part

Example #10

0

Show file

File: GAExecutor.py Project: fonhorst/heft

    def __init__(self,
                 workflow,
                 resource_manager,
                 estimator,
                 base_fail_duration,
                 base_fail_dispersion,
                 initial_schedule):
        ## TODO: remake it later
        self.queue = deque()
        self.current_time = 0
        self.workflow = workflow
        # DynamicHeft
        #self.heft_planner = heft_planner
        self.resource_manager = resource_manager
        self.estimator = estimator
        self.base_fail_duration = base_fail_duration
        self.base_fail_dispersion = base_fail_dispersion
        ##self.current_schedule = Schedule({node:[] for node in heft_planner.get_nodes()})
        self.initial_schedule = initial_schedule
        self.current_schedule = Schedule({key:[] for key in initial_schedule.mapping.keys()})

        #self.ready_tasks = []
        self.finished_tasks = [self.workflow.head_task.id]

        ## TODO: correct this stub later
        self.logger = None

Example #11

0

Show file

File: ga_base_experiment.py Project: fonhorst/heft

def do_exp(wf_name):
    _wf = wf(wf_name)
    rm = ExperimentResourceManager(rg.r([10, 15, 25, 30]))
    estimator = SimpleTimeCostEstimator(comp_time_cost=0,
                                        transf_time_cost=0,
                                        transferMx=None,
                                        ideal_flops=20,
                                        transfer_time=100)

    empty_fixed_schedule_part = Schedule({node: [] for node in rm.get_nodes()})

    heft_schedule = run_heft(_wf, rm, estimator)

    ga_functions = GAFunctions2(_wf, rm, estimator)

    generate = partial(ga_generate,
                       ga_functions=ga_functions,
                       fixed_schedule_part=empty_fixed_schedule_part,
                       current_time=0.0,
                       init_sched_percent=0.05,
                       initial_schedule=heft_schedule)

    stats = tools.Statistics(lambda ind: ind.fitness.values[0])
    stats.register("avg", numpy.mean)
    stats.register("std", numpy.std)
    stats.register("min", numpy.min)
    stats.register("max", numpy.max)

    logbook = tools.Logbook()
    logbook.header = ["gen", "evals"] + stats.fields

    toolbox = Toolbox()
    toolbox.register("generate", generate)
    toolbox.register(
        "evaluate",
        fit_converter(
            ga_functions.build_fitness(empty_fixed_schedule_part, 0.0)))
    toolbox.register("clone", deepcopy)
    toolbox.register("mate", ga_functions.crossover)
    toolbox.register("sweep_mutation", ga_functions.sweep_mutation)
    toolbox.register("mutate", ga_functions.mutation)
    # toolbox.register("select_parents", )
    # toolbox.register("select", tools.selTournament, tournsize=4)
    toolbox.register("select", tools.selRoulette)
    pop, logbook, best = run_ga(toolbox=toolbox,
                                logbook=logbook,
                                stats=stats,
                                **GA_PARAMS)

    resulted_schedule = ga_functions.build_schedule(best,
                                                    empty_fixed_schedule_part,
                                                    0.0)

    Utility.validate_static_schedule(_wf, resulted_schedule)

    ga_makespan = Utility.makespan(resulted_schedule)
    return ga_makespan

Example #12

0

Show file

File: DSimpleHeft.py Project: fonhorst/heft

def run_heft(workflow, resource_manager, estimator):
    """
    It simply runs heft with empty initial schedule
    and returns complete schedule
    """
    heft = DynamicHeft(workflow, resource_manager, estimator)
    nodes = resource_manager.get_nodes()
    init_schedule = Schedule({node: [] for node in nodes})
    return heft.run(init_schedule)

Example #13

0

Show file

File: HeftHelper.py Project: fonhorst/heft

    def clean_unfinished(schedule):
        def clean(items):
            return [
                item for item in items if item.state == ScheduleItem.FINISHED
                or item.state == ScheduleItem.EXECUTING
            ]

        new_mapping = {
            node: clean(items)
            for (node, items) in schedule.mapping.items()
        }
        return Schedule(new_mapping)

Example #14

0

Show file

File: GaHeftExecutor.py Project: visheratin/heft

    def init(self):
        self.current_schedule = Schedule({node: [] for node in self.heft_planner.get_nodes()})

        initial_schedule = self.heft_planner.run(deepcopy(self.current_schedule))
        #print("HEFT MAKESPAN: {0}".format(Utility.makespan(initial_schedule)))
        # TODO: change these two ugly records
        result = self.ga_builder()(self.current_schedule, initial_schedule)
        #print("INIT MAKESPAN: {0}".format(Utility.makespan(result[0][2])))
        self.current_schedule = result[0][2]

        self._post_new_events()
        return result

Example #15

0

Show file

File: HeftExecutor.py Project: fonhorst/heft

 def init(self):
     if self.initial_schedule is None:
         self.current_schedule = Schedule(
             {node: []
              for node in self.heft_planner.get_nodes()})
         self.current_schedule = self.heft_planner.run(
             self.current_schedule)
     else:
         id_to_task = {
             tsk.id: tsk
             for tsk in HeftHelper.get_all_tasks(self.heft_planner.workflow)
         }
         mapping = {
             node: [
                 ScheduleItem(id_to_task[item.job.id], item.start_time,
                              item.end_time) for item in items
             ]
             for (node, items) in self.initial_schedule.mapping.items()
         }
         self.current_schedule = Schedule(mapping)
     self._post_new_events()

Example #16

0

Show file

File: GaHeftExecutor.py Project: fonhorst/heft

    def init(self):
        self.current_schedule = Schedule({node: [] for node in self.heft_planner.get_nodes()})

        initial_schedule = self.heft_planner.run(Schedule({node: [] for node in self.heft_planner.get_nodes()}))

        # print("heft solution!")
        # fsh = [hash(key) for key in initial_schedule.mapping.keys()]
        # rm_hashes = [hash(node) for node in self.resource_manager.get_nodes()]
        # if any(((h not in fsh) for h in rm_hashes)):
        #     raise Exception("Fixed schedule is broken")


        # TODO: change these two ugly records
        result = self.ga_builder()(self.current_schedule, initial_schedule)


        # print("Ga solution is broken!")
        # fsh = [hash(key) for key in result[0][2].mapping.keys()]
        # rm_hashes = [hash(node) for node in self.resource_manager.get_nodes()]
        # if any(((h not in fsh) for h in rm_hashes)):
        #     raise Exception("Fixed schedule is broken")



        if not self._apply_mh_if_better(None, heuristic_resulted_schedule=initial_schedule,
                           metaheuristic_resulted_schedule=result[0][2]):
            self.current_schedule = initial_schedule
            self._post_new_events()

        # print("Before Before!")
        # fsh = [hash(key) for key in self.current_schedule.mapping.keys()]
        # rm_hashes = [hash(node) for node in self.resource_manager.get_nodes()]
        # if any(((h not in fsh) for h in rm_hashes)):
        #     raise Exception("Fixed schedule is broken")


        #self.current_schedule = result[0][2]
        #self._post_new_events()
        return result

Example #17

0

Show file

File: GaOldPopExecutor.py Project: fonhorst/heft

    def init(self):
        ## TODO: replace it with logging
        print("Working with initial state of nodes: {0}".format([n.flops for n in self.resource_manager.get_nodes()]))

        ga_planner = self.ga_builder()
        self.current_schedule = Schedule({node: [] for node in self.resource_manager.get_nodes()})
        (result, logbook) = ga_planner(self.current_schedule, None)
        self.past_pop = ga_planner.get_pop()
        print("Result makespan: " + str(Utility.makespan(result[2])))
        self.current_schedule = result[2]
        self._post_new_events()

        self.failed_once = False
        pass

Example #18

0

Show file

        def _run_heft():
            dynamic_planner = DynamicHeft(wf, resource_manager, estimator)
            nodes = HeftHelper.to_nodes(resource_manager.resources)
            current_cleaned_schedule = Schedule({node: [] for node in nodes})
            schedule_dynamic_heft = dynamic_planner.run(
                current_cleaned_schedule)

            self._validate(wf, estimator, schedule_dynamic_heft)

            if is_visualized:
                viz.visualize_task_node_mapping(wf, schedule_dynamic_heft)
                # Utility.create_jedule_visualization(schedule_dynamic_heft, wf_name+'_heft')
                pass
            return schedule_dynamic_heft

Example #19

0

Show file

    def __init__(self, workflow, resource_manager, estimator,
                 base_fail_duration, base_fail_dispersion, initial_schedule):
        ## TODO: remake it later
        self.queue = deque()
        self.current_time = 0
        self.workflow = workflow
        # DynamicHeft
        #self.heft_planner = heft_planner
        self.resource_manager = resource_manager
        self.estimator = estimator
        self.base_fail_duration = base_fail_duration
        self.base_fail_dispersion = base_fail_dispersion
        ##self.current_schedule = Schedule({node:[] for node in heft_planner.get_nodes()})
        self.initial_schedule = initial_schedule
        self.current_schedule = Schedule(
            {key: []
             for key in initial_schedule.mapping.keys()})

        #self.ready_tasks = []
        self.finished_tasks = [self.workflow.head_task.id]

        ## TODO: correct this stub later
        self.logger = None

Example #20

0

Show file

File: Utility.py Project: fonhorst/heft

        def as_schedule(dct):
            if '__cls__' in dct and dct['__cls__'] == 'Node':
                res = dct['resource']
                node = Node(dct['name'], res, dct['soft'])
                node.flops = dct['flops']
                return node
            if '__cls__' in dct and dct['__cls__'] == 'ScheduleItem':
                task = task_dict[dct['job']]
                scItem = ScheduleItem(task, dct['start_time'], dct['end_time'])
                scItem.state = dct['state']
                return scItem
            if '__cls__' in dct and dct['__cls__'] == 'Schedule':
                mapping = {
                    node_values['node']: node_values['value']
                    for node_values in dct['mapping']
                }
                schedule = Schedule(mapping)
                return schedule
            if '__cls__' in dct and dct['__cls__'] == 'Resource':
                res = Resource(dct['name'])
                res.nodes = dct['nodes']
                return res
            if '__cls__' in dct and dct['__cls__'] == 'SaveBundle':

                all_nodes = set()
                for res in dct['dedicated_resources']:
                    for node in res.nodes:
                        node.resource = res
                    all_nodes.update(res.nodes)

                all_nodes = {node.name: node for node in all_nodes}

                dct['ga_schedule'].mapping = {
                    all_nodes[node_name]: values
                    for (node_name,
                         values) in dct['ga_schedule'].mapping.items()
                }

                bundle = SaveBundle(dct['name'], dct['dedicated_resources'],
                                    dct['transfer_mx'], dct['ideal_flops'],
                                    dct['ga_schedule'], dct['wf_name'])
                return bundle
            return dct

Example #21

0

Show file

File: DSimpleHeft.py Project: fonhorst/heft

    def __init__(self, workflow, resource_manager, estimator, ranking=None):
        self.current_schedule = Schedule(dict())
        self.workflow = workflow
        self.resource_manager = resource_manager
        self.estimator = estimator

        self.current_time = 0

        nodes = self.get_nodes()

        self.wf_jobs = self.make_ranking(self.workflow,
                                         nodes) if ranking is None else ranking

        # print("A: " + str(self.wf_jobs))

        #TODO: remove it later
        # to_print = ''
        # for job in self.wf_jobs:
        #     to_print = to_print + str(job.id) + " "
        # print(to_print)
        pass

Example #22

0

Show file

File: ordering_operators.py Project: fonhorst/heft

def generate(wf,
             rm,
             estimator,
             schedule=None,
             fixed_schedule_part=None,
             current_time=0.0):
    sched = schedule if schedule is not None else SimpleRandomizedHeuristic(
        wf, rm.get_nodes(), estimator).schedule(fixed_schedule_part,
                                                current_time)

    if fixed_schedule_part is not None:
        un_tasks = unmoveable_tasks(fixed_schedule_part)
        clean_sched = Schedule({
            node: [
                item for item in items if item.job.id not in un_tasks
                and item.state != ScheduleItem.FAILED
            ]
            for node, items in sched.mapping.items()
        })
    else:
        clean_sched = sched

    mapping, ordering = ord_and_map(clean_sched)
    ordering_numseq = ordering_to_numseq(ordering)
    ordering_map = {
        task_id: val
        for task_id, val in zip(ordering, ordering_numseq)
    }
    ord_p, map_p = OrderingParticle(ordering_map), MappingParticle(mapping)
    ord_p.velocity = OrderingParticle.Velocity({})
    map_p.velocity = MappingParticle.Velocity({})

    result = CompoundParticle(map_p, ord_p)
    if schedule is None and not validate_mapping_with_alive_nodes(
            result.mapping.entity, rm):
        raise Exception("found invalid solution in generated array")
    return result

Example #23

0

Show file

File: GaHeftExecutor.py Project: fonhorst/heft

        def _get_fixed_schedule(schedule, front_event):
            def is_before_event(item):
                # hard to resolve corner case. The simulator doesn't guranteed the order of appearing events.
                if item.start_time < front_event.end_time:
                    return True
                ## TODO: Urgent!!! experimental change. Perhaps, It should be removed from here later.
                if item.state == ScheduleItem.FINISHED or item.state == ScheduleItem.FAILED:
                    return True
                return False
            ##TODO: it's dangerous operation.
            ## TODO: need create new example of ScheduleItem.
            def set_proper_state(item):

                new_item = ScheduleItem.copy(item)

                non_finished = new_item.state == ScheduleItem.EXECUTING or new_item.state == ScheduleItem.UNSTARTED
                ## TODO: Urgent!: dangerous place
                if non_finished and new_item.end_time <= front_event.end_time:
                    new_item.state = ScheduleItem.FINISHED
                if non_finished and new_item.end_time > front_event.end_time:
                    new_item.state = ScheduleItem.EXECUTING
                return new_item
            fixed_mapping = {key: [set_proper_state(item) for item in items if is_before_event(item)] for (key, items) in schedule.mapping.items()}
            return Schedule(fixed_mapping)

Example #24

0

Show file

File: HeftExecutor.py Project: visheratin/heft

class HeftExecutor(FailRandom, BaseExecutor):

    def __init__(self, heft_planner, base_fail_duration, base_fail_dispersion ,
                 initial_schedule = None, logger=None):
        ## TODO: remake it later
        self.queue = deque()
        self.current_time = 0
        # DynamicHeft
        self.heft_planner = heft_planner
        self.base_fail_duration = base_fail_duration
        self.base_fail_dispersion = base_fail_dispersion
        self.initial_schedule = initial_schedule
        self.current_schedule = initial_schedule

        self.logger = logger

    def init(self):
        if self.initial_schedule is None:
            self.current_schedule  = Schedule({node:[] for node in self.heft_planner.get_nodes()})
            self.current_schedule = self.heft_planner.run(self.current_schedule)
        else:
            id_to_task = {tsk.id: tsk for tsk in HeftHelper.get_all_tasks(self.heft_planner.workflow)}
            mapping = {node: [ScheduleItem(id_to_task[item.job.id], item.start_time, item.end_time) for item in items] for (node, items) in self.initial_schedule.mapping.items()}
            self.current_schedule = Schedule(mapping)
        self._post_new_events()



    def _generate_failtime_and_duration(self, item):
        # generate fail time, post it
        duration = self.base_fail_duration + self.base_fail_dispersion *random.random()
        time_of_fail = (item.end_time - self.current_time)*random.random()
        return (time_of_fail, duration)

    def _task_start_handler(self, event):
        # check task as executing
        # self.current_schedule.change_state(event.task, ScheduleItem.EXECUTING)

        # try to find nodes in cloud


        # check if failed and post
        (node, item) = self.current_schedule.place_by_time(event.task, event.time_happened)
        item.state = ScheduleItem.EXECUTING

        if self._check_fail(event.task, node):

            (time_of_fail, duration) = self._generate_failtime_and_duration(item)
            time_of_fail = self.current_time + (time_of_fail if time_of_fail > 0 else 0.01) ##(item.end_time - self.current_time)*0.01

            event_failed = NodeFailed(node, event.task)
            event_failed.time_happened = time_of_fail

            event_nodeup = NodeUp(node)
            event_nodeup.time_happened = time_of_fail + duration

            self.post(event_failed)
            self.post(event_nodeup)
            # remove TaskFinished event
            self.queue = deque([ev for ev in self.queue if not (isinstance(ev, TaskFinished) and ev.task.id == event.task.id)])
            pass

        pass

    def _task_finished_handler(self, event):
        # check task finished
        self.current_schedule.change_state_executed(event.task, ScheduleItem.FINISHED)
        pass

    def _node_failed_handler(self, event):
        # check node down
        self.heft_planner.resource_manager.node(event.node).state = Node.Down
        # check failed event in schedule
        ## TODO: ambigious choice
        ##self.current_schedule.change_state(event.task, ScheduleItem.FAILED)
        it = [item for item in self.current_schedule.mapping[event.node] if item.job.id == event.task.id and item.state == ScheduleItem.EXECUTING]
        if len(it) != 1:
            ## TODO: raise exception here
            pass

        it[0].state = ScheduleItem.FAILED
        it[0].end_time = self.current_time

        self._reschedule(event)
        pass

    def _node_up_handler(self, event):
        # check node up
        self.heft_planner.resource_manager.node(event.node).state = Node.Unknown
        self._reschedule(event)
        pass

    pass

Example #25

0

Show file

File: HeftExecutor.py Project: fonhorst/heft

class HeftExecutor(FailRandom, BaseExecutor):
    def __init__(self,
                 resource_manager,
                 heft_planner,
                 base_fail_duration,
                 base_fail_dispersion,
                 fail_count_upper_limit=None,
                 initial_schedule=None,
                 logger=None):
        super().__init__(heft_planner, base_fail_duration,
                         base_fail_dispersion, fail_count_upper_limit,
                         initial_schedule, logger)

        ## TODO: remake it later
        self.queue = deque()
        self.current_time = 0
        # DynamicHeft
        self.heft_planner = heft_planner
        self.base_fail_duration = base_fail_duration
        self.base_fail_dispersion = base_fail_dispersion
        self.initial_schedule = initial_schedule
        self.current_schedule = initial_schedule

        self.resource_manager = resource_manager
        self._fail_count_upper_limit = fail_count_upper_limit

        self.logger = logger

    def init(self):
        if self.initial_schedule is None:
            self.current_schedule = Schedule(
                {node: []
                 for node in self.heft_planner.get_nodes()})
            self.current_schedule = self.heft_planner.run(
                self.current_schedule)
        else:
            id_to_task = {
                tsk.id: tsk
                for tsk in HeftHelper.get_all_tasks(self.heft_planner.workflow)
            }
            mapping = {
                node: [
                    ScheduleItem(id_to_task[item.job.id], item.start_time,
                                 item.end_time) for item in items
                ]
                for (node, items) in self.initial_schedule.mapping.items()
            }
            self.current_schedule = Schedule(mapping)
        self._post_new_events()

    def _generate_failtime_and_duration(self, item):
        # generate fail time, post it
        duration = self.base_fail_duration + self.base_fail_dispersion * random.random(
        )
        time_of_fail = (item.end_time - self.current_time) * random.random()
        return (time_of_fail, duration)

    def _task_start_handler(self, event):
        # check task as executing
        # self.current_schedule.change_state(event.task, ScheduleItem.EXECUTING)

        # try to find nodes in cloud

        # check if failed and post
        (node,
         item) = self.current_schedule.place_by_time(event.task,
                                                     event.time_happened)
        item.state = ScheduleItem.EXECUTING

        if self._check_fail(event.task, node):

            (time_of_fail,
             duration) = self._generate_failtime_and_duration(item)
            time_of_fail = self.current_time + (
                time_of_fail if time_of_fail > 0 else 0.01
            )  ##(item.end_time - self.current_time)*0.01

            event_failed = NodeFailed(node, event.task)
            event_failed.time_happened = time_of_fail

            event_nodeup = NodeUp(node)
            event_nodeup.time_happened = time_of_fail + duration

            self.post(event_failed)
            self.post(event_nodeup)
            # remove TaskFinished event
            self.queue = deque([
                ev for ev in self.queue
                if not (isinstance(ev, TaskFinished)
                        and ev.task.id == event.task.id)
            ])
            pass

        pass

    def _task_finished_handler(self, event):
        # check task finished
        self.current_schedule.change_state_executed(event.task,
                                                    ScheduleItem.FINISHED)
        pass

    def _node_failed_handler(self, event):
        # check node down
        self.heft_planner.resource_manager.node(event.node).state = Node.Down
        # check failed event in schedule
        ## TODO: ambigious choice
        ##self.current_schedule.change_state(event.task, ScheduleItem.FAILED)
        it = [
            item for item in self.current_schedule.mapping[event.node]
            if item.job.id == event.task.id
            and item.state == ScheduleItem.EXECUTING
        ]
        if len(it) != 1:
            ## TODO: raise exception here
            pass

        it[0].state = ScheduleItem.FAILED
        it[0].end_time = self.current_time

        self._reschedule(event)
        pass

    def _node_up_handler(self, event):
        # check node up
        self.heft_planner.resource_manager.node(
            event.node).state = Node.Unknown
        self._reschedule(event)
        pass

    pass

Example #26

0

Show file

File: GAExecutor.py Project: fonhorst/heft

class GAExecutor(FailRandom, BaseExecutor):

    def __init__(self,
                 workflow,
                 resource_manager,
                 estimator,
                 base_fail_duration,
                 base_fail_dispersion,
                 initial_schedule):
        ## TODO: remake it later
        self.queue = deque()
        self.current_time = 0
        self.workflow = workflow
        # DynamicHeft
        #self.heft_planner = heft_planner
        self.resource_manager = resource_manager
        self.estimator = estimator
        self.base_fail_duration = base_fail_duration
        self.base_fail_dispersion = base_fail_dispersion
        ##self.current_schedule = Schedule({node:[] for node in heft_planner.get_nodes()})
        self.initial_schedule = initial_schedule
        self.current_schedule = Schedule({key:[] for key in initial_schedule.mapping.keys()})

        #self.ready_tasks = []
        self.finished_tasks = [self.workflow.head_task.id]

        ## TODO: correct this stub later
        self.logger = None

    def init(self):
        #self.current_schedule = self.heft_planner.run(self.current_schedule)

        #to_run = [child for child in self.workflow.head_task.children if self.is_next_to_run(child)]
        unstarted_tasks = self.get_ready_tasks(self.workflow.head_task, None)
        #run ready tasks
        self.post_new_events(unstarted_tasks)

    def is_ready(self, task):
        nope = False in [(p.id in self.finished_tasks) for p in task.parents]
        return not nope

    def is_next_to_run(self, task):
        (node, item) = self.initial_schedule.place(task)
        its = [it for it in self.initial_schedule.mapping[node] if it.start_time < item.start_time]
        not_next = False in [(it.job.id in self.finished_tasks) for it in its]
        return not not_next

    def _task_start_handler(self, event):
        (node, item) = self.current_schedule.place_by_time(event.task, event.time_happened)
        item.state = ScheduleItem.EXECUTING

        if self._check_fail(event.task, node):
            # generate fail time, post it
            duration = self.base_fail_duration + self.base_fail_dispersion *random.random()
            time_of_fail = (item.end_time - self.current_time)*random.random()
            time_of_fail = self.current_time + (time_of_fail if time_of_fail > 0 else 0.01) ##(item.end_time - self.current_time)*0.01

            event_failed = NodeFailed(node, event.task)
            event_failed.time_happened = time_of_fail

            event_nodeup = NodeUp(node)
            event_nodeup.time_happened = time_of_fail + duration

            self.post(event_failed)
            self.post(event_nodeup)
            # remove TaskFinished event
            self.queue = deque([ev for ev in self.queue if not (isinstance(ev, TaskFinished) and ev.task.id == event.task.id)])

            pass
        pass

    def _task_finished_handler(self, event):
        # check task finished

        self.current_schedule.change_state_executed(event.task, ScheduleItem.FINISHED)

        self.finished_tasks.append(event.task.id)

        unstarted_items = self.get_ready_tasks(event.task, event.node)

        ##TODO: remove it later
        #print("==============================")
        #print("Task " + str(event.task) + " finished")
        #for item in unstarted_items:
        #    print("Start task: " + str(item.job) + " On node: " + str(self.initial_schedule.place(item.job)[0]))
        #print("==============================")
        #generate new task start events
        self.post_new_events(unstarted_items)
        pass

    def _node_failed_handler(self, event):
        # check node down
        self.resource_manager.node(event.node).state = Node.Down
        # check failed event in schedule
        ## TODO: ambigious choice
        ##self.current_schedule.change_state(event.task, ScheduleItem.FAILED)
        it = [item for item in self.current_schedule.mapping[event.node] if item.job.id == event.task.id and item.state == ScheduleItem.EXECUTING]
        if len(it) != 1:
            ## TODO: raise exception here
            pass

        it[0].state = ScheduleItem.FAILED
        it[0].end_time = self.current_time
        pass

    def _node_up_handler(self, event):
        # check node up
        self.resource_manager.node(event.node).state = Node.Unknown
        #get next task for this node
        next_sched_item = []
        for item in self.initial_schedule.mapping[event.node]:
            if item.job.id not in self.finished_tasks:
                next_sched_item = item
                break

        runtime = next_sched_item.end_time - next_sched_item.start_time
        start_time = self.current_time
        end_time = start_time + runtime

        actual_sched_item = ScheduleItem(next_sched_item.job, start_time, end_time)
        self.post_new_events([actual_sched_item])
        pass


    def get_ready_tasks(self, ptask, pnode):
        unstarted_items = []
        next_for_ptask = self.initial_schedule.get_next_item(ptask)
        #next_for_ptask = [] if next_for_ptask is None else [next_for_ptask.job]
        tsks = [tsk for tsk in ptask.children if self.is_ready(tsk) and self.is_next_to_run(tsk)]
        ##TODO: refactor it later
        if next_for_ptask is not None and next_for_ptask.job not in tsks and self.is_ready(next_for_ptask.job) and self.is_next_to_run(next_for_ptask.job):
            tsks.append(next_for_ptask.job)

        # tsks mustn't be finished, executing or their node is Down
        def appropriate_to_run(tsk):
            if tsk.id in self.finished_tasks:
                return False
            if self.current_schedule.is_executing(tsk):
                return False
            nd = self.initial_schedule.place(tsk)[0]
            if self.resource_manager.node(nd).state == Node.Down:
                return False
            return True

        tsks = [tsk for tsk in tsks if appropriate_to_run(tsk)]

        for child in tsks:
            (node, item) = self.initial_schedule.place(child)

            ## TODO: remake it later
            # transf = 0 if pnode is None else self.estimator.estimate_transfer_time(pnode, node, ptask, child)
            # runtime = item.end_time - item.start_time
            # start_time = self.current_time + transf
            # end_time = start_time + runtime

            sitems = self.current_schedule.mapping.items()
            pids = [p.id for p in child.parents]
            mp = {it.job.id: (pnd, it) for (pnd, items) in sitems for it in items if (it.job.id in pids) and (it.state == ScheduleItem.FINISHED) }
            estms = [it.end_time + self.estimator.estimate_transfer_time(pnd, node, it.job, child) for (id, (pnd, it)) in mp.items()]
            transf_end = 0 if len(estms) == 0 else max(estms)

            runtime = item.end_time - item.start_time
            start_time = max(self.current_time, transf_end)
            end_time = start_time + runtime


            actual_sched_item = ScheduleItem(item.job, start_time, end_time)
            unstarted_items.append(actual_sched_item)
        return unstarted_items

    def post_new_events(self, unstarted_items):
        for item in unstarted_items:
            (node, it) = self.initial_schedule.place(item.job)

            event_start = TaskStart(item.job)
            event_start.time_happened = item.start_time
            event_start.node = node

            event_finish = TaskFinished(item.job)
            event_finish.time_happened = item.end_time
            event_finish.node = node

            self.post(event_start)
            self.post(event_finish)

            self.current_schedule.mapping[node].append(item)
        pass

Example #27

0

Show file

class GAExecutor(FailRandom, BaseExecutor):
    def __init__(self, workflow, resource_manager, estimator,
                 base_fail_duration, base_fail_dispersion, initial_schedule):
        ## TODO: remake it later
        self.queue = deque()
        self.current_time = 0
        self.workflow = workflow
        # DynamicHeft
        #self.heft_planner = heft_planner
        self.resource_manager = resource_manager
        self.estimator = estimator
        self.base_fail_duration = base_fail_duration
        self.base_fail_dispersion = base_fail_dispersion
        ##self.current_schedule = Schedule({node:[] for node in heft_planner.get_nodes()})
        self.initial_schedule = initial_schedule
        self.current_schedule = Schedule(
            {key: []
             for key in initial_schedule.mapping.keys()})

        #self.ready_tasks = []
        self.finished_tasks = [self.workflow.head_task.id]

        ## TODO: correct this stub later
        self.logger = None

    def init(self):
        #self.current_schedule = self.heft_planner.run(self.current_schedule)

        #to_run = [child for child in self.workflow.head_task.children if self.is_next_to_run(child)]
        unstarted_tasks = self.get_ready_tasks(self.workflow.head_task, None)
        #run ready tasks
        self.post_new_events(unstarted_tasks)

    def is_ready(self, task):
        nope = False in [(p.id in self.finished_tasks) for p in task.parents]
        return not nope

    def is_next_to_run(self, task):
        (node, item) = self.initial_schedule.place(task)
        its = [
            it for it in self.initial_schedule.mapping[node]
            if it.start_time < item.start_time
        ]
        not_next = False in [(it.job.id in self.finished_tasks) for it in its]
        return not not_next

    def _task_start_handler(self, event):
        (node,
         item) = self.current_schedule.place_by_time(event.task,
                                                     event.time_happened)
        item.state = ScheduleItem.EXECUTING

        if self._check_fail(event.task, node):
            # generate fail time, post it
            duration = self.base_fail_duration + self.base_fail_dispersion * random.random(
            )
            time_of_fail = (item.end_time -
                            self.current_time) * random.random()
            time_of_fail = self.current_time + (
                time_of_fail if time_of_fail > 0 else 0.01
            )  ##(item.end_time - self.current_time)*0.01

            event_failed = NodeFailed(node, event.task)
            event_failed.time_happened = time_of_fail

            event_nodeup = NodeUp(node)
            event_nodeup.time_happened = time_of_fail + duration

            self.post(event_failed)
            self.post(event_nodeup)
            # remove TaskFinished event
            self.queue = deque([
                ev for ev in self.queue
                if not (isinstance(ev, TaskFinished)
                        and ev.task.id == event.task.id)
            ])

            pass
        pass

    def _task_finished_handler(self, event):
        # check task finished

        self.current_schedule.change_state_executed(event.task,
                                                    ScheduleItem.FINISHED)

        self.finished_tasks.append(event.task.id)

        unstarted_items = self.get_ready_tasks(event.task, event.node)

        ##TODO: remove it later
        #print("==============================")
        #print("Task " + str(event.task) + " finished")
        #for item in unstarted_items:
        #    print("Start task: " + str(item.job) + " On node: " + str(self.initial_schedule.place(item.job)[0]))
        #print("==============================")
        #generate new task start events
        self.post_new_events(unstarted_items)
        pass

    def _node_failed_handler(self, event):
        # check node down
        self.resource_manager.node(event.node).state = Node.Down
        # check failed event in schedule
        ## TODO: ambigious choice
        ##self.current_schedule.change_state(event.task, ScheduleItem.FAILED)
        it = [
            item for item in self.current_schedule.mapping[event.node]
            if item.job.id == event.task.id
            and item.state == ScheduleItem.EXECUTING
        ]
        if len(it) != 1:
            ## TODO: raise exception here
            pass

        it[0].state = ScheduleItem.FAILED
        it[0].end_time = self.current_time
        pass

    def _node_up_handler(self, event):
        # check node up
        self.resource_manager.node(event.node).state = Node.Unknown
        #get next task for this node
        next_sched_item = []
        for item in self.initial_schedule.mapping[event.node]:
            if item.job.id not in self.finished_tasks:
                next_sched_item = item
                break

        runtime = next_sched_item.end_time - next_sched_item.start_time
        start_time = self.current_time
        end_time = start_time + runtime

        actual_sched_item = ScheduleItem(next_sched_item.job, start_time,
                                         end_time)
        self.post_new_events([actual_sched_item])
        pass

    def get_ready_tasks(self, ptask, pnode):
        unstarted_items = []
        next_for_ptask = self.initial_schedule.get_next_item(ptask)
        #next_for_ptask = [] if next_for_ptask is None else [next_for_ptask.job]
        tsks = [
            tsk for tsk in ptask.children
            if self.is_ready(tsk) and self.is_next_to_run(tsk)
        ]
        ##TODO: refactor it later
        if next_for_ptask is not None and next_for_ptask.job not in tsks and self.is_ready(
                next_for_ptask.job) and self.is_next_to_run(
                    next_for_ptask.job):
            tsks.append(next_for_ptask.job)

        # tsks mustn't be finished, executing or their node is Down
        def appropriate_to_run(tsk):
            if tsk.id in self.finished_tasks:
                return False
            if self.current_schedule.is_executing(tsk):
                return False
            nd = self.initial_schedule.place(tsk)[0]
            if self.resource_manager.node(nd).state == Node.Down:
                return False
            return True

        tsks = [tsk for tsk in tsks if appropriate_to_run(tsk)]

        for child in tsks:
            (node, item) = self.initial_schedule.place(child)

            ## TODO: remake it later
            # transf = 0 if pnode is None else self.estimator.estimate_transfer_time(pnode, node, ptask, child)
            # runtime = item.end_time - item.start_time
            # start_time = self.current_time + transf
            # end_time = start_time + runtime

            sitems = self.current_schedule.mapping.items()
            pids = [p.id for p in child.parents]
            mp = {
                it.job.id: (pnd, it)
                for (pnd, items) in sitems for it in items
                if (it.job.id in pids) and (it.state == ScheduleItem.FINISHED)
            }
            estms = [
                it.end_time +
                self.estimator.estimate_transfer_time(pnd, node, it.job, child)
                for (id, (pnd, it)) in mp.items()
            ]
            transf_end = 0 if len(estms) == 0 else max(estms)

            runtime = item.end_time - item.start_time
            start_time = max(self.current_time, transf_end)
            end_time = start_time + runtime

            actual_sched_item = ScheduleItem(item.job, start_time, end_time)
            unstarted_items.append(actual_sched_item)
        return unstarted_items

    def post_new_events(self, unstarted_items):
        for item in unstarted_items:
            (node, it) = self.initial_schedule.place(item.job)

            event_start = TaskStart(item.job)
            event_start.time_happened = item.start_time
            event_start.node = node

            event_finish = TaskFinished(item.job)
            event_finish.time_happened = item.end_time
            event_finish.node = node

            self.post(event_start)
            self.post(event_finish)

            self.current_schedule.mapping[node].append(item)
        pass

Example #28

0

Show file

File: CloudHeftExecutor.py Project: fonhorst/heft

class CloudHeftExecutor(EventMachine):

    STATUS_RUNNING = 'running'
    STATUS_FINISHED = 'finished'

    def __init__(self, heft_planner, base_fail_duration, base_fail_dispersion, desired_reliability, public_resource_manager, initial_schedule = None):
        ## TODO: remake it later
        self.queue = deque()
        self.current_time = 0
        # DynamicHeft
        self.heft_planner = heft_planner
        self.base_fail_duration = base_fail_duration
        self.base_fail_dispersion = base_fail_dispersion
        self.desired_reliability = desired_reliability
        self.public_resources_manager = public_resource_manager
        #self.current_schedule = Schedule({node: [] for node in heft_planner.get_nodes()})
        self.initial_schedule = initial_schedule
        self.current_schedule = initial_schedule

        self.register = dict()


    def init(self):
        #self.current_schedule = self.heft_planner.run(self.current_schedule)
        if self.initial_schedule is None:
            self.current_schedule  = Schedule({node:[] for node in self.heft_planner.get_nodes()})
            self.current_schedule = self.heft_planner.run(self.current_schedule)
        else:
            id_to_task = {tsk.id: tsk for tsk in HeftHelper.get_all_tasks(self.heft_planner.workflow)}
            mapping = {node: [ScheduleItem(id_to_task[item.job.id], item.start_time, item.end_time) for item in items] for (node, items) in self.initial_schedule.mapping.items()}
            self.current_schedule = Schedule(mapping)
        self.post_new_events()

    def event_arrived(self, event):

        def reschedule(event):
            self.heft_planner.current_time = self.current_time
            current_cleaned_schedule = self.clean_events(event)
            self.current_schedule = self.heft_planner.run(current_cleaned_schedule)
            self.post_new_events()

        def check_fail(reliability):
            res = random.random()
            if res > reliability:
                return True
            return False


        if isinstance(event, TaskStart):

            # TODO: if node is cloud node, do nothing
            prm = self.public_resources_manager
            if prm.isCloudNode(event.node):
                return None

            # check if failed and post
            (node, item) = self.current_schedule.place_by_time(event.task, event.time_happened)
            item.state = ScheduleItem.EXECUTING

            # check task as executing
            # self.current_schedule.change_state(event.task, ScheduleItem.EXECUTING)

            # public_resources_manager:
            #   determine nodes of proper soft type
            #   check and determine free nodes
            #   determine reliability of every nodes
            #   determine time_of_execution probability for (task,node) pair

            # try to find nodes in cloud

            if event.task not in self.register:

                proper_nodes = prm.get_by_softreq(event.task.soft_reqs)
                proper_nodes = [node for node in proper_nodes if not prm.isBusy(node)]
                sorted_proper_nodes = sorted(proper_nodes, key=lambda x: prm.get_reliability(x.name))
                current_set = []

                base_reliability = self.heft_planner.estimator.estimate_reliability(event.task, event.node)
                obtained_reliability = base_reliability
                dt = item.end_time - item.start_time
                def calc(node, dt):
                        #(dt, task, node, transfer_estimation)
                        # TODO: add proper transfer time here
                        fp = prm.get_reliability(node.name)
                        comp_time = self.heft_planner.estimator.estimate_runtime(event.task, node)
                        cp = prm.probability_estimator(dt, comp_time, 0)
                        #TODO: remove it later
                        #cp = 0.95
                        #print("cp: " + str(cp))
                        return (node, fp, cp )

                it_comm_buf = 0
                for pnode in sorted_proper_nodes:
                    common_reliability = 1 - base_reliability
                    #TODO: refactor this later
                    if 1 - common_reliability >= self.desired_reliability:
                        break
                    res = calc(pnode, dt)
                    current_set.append(res)
                    #TODO: add dencity law of probability for dedicated resource

                    for (nd, fp, cp) in current_set:
                        common_reliability *= (1 - fp*cp)
                    common_reliability = 1 - common_reliability
                    #print("common_reliability: " + str(common_reliability))
                    it_comm_buf = common_reliability
                    if common_reliability >= self.desired_reliability:
                        #print("Commmon: "+ str(common_reliability))
                        break

                #print("Comm " + str(it_comm_buf) + " task: " + str(event.task.id))
                #print(" Obtained reliability " + str(obtained_reliability) + " for task: " + str(event.task))

                def frange(x, y, jump):
                    while x < y:
                        yield x
                        x += jump

                for (nd, fp, cp) in current_set:
                    comp_time = self.heft_planner.estimator.estimate_runtime(event.task, nd)
                    #sigma 0.1*M lets take 0.6*M


                    #TODO: uncomment it later

                    ints = [(i, calc(nd, i))for i in frange(0, comp_time + 0.2*comp_time, 0.05*comp_time)]
                    rd = random.random()
                    generated_comp_time = comp_time
                    for (i, p) in ints:
                        if p[2] > rd:
                            generated_comp_time = i
                            break

                    #comp_time + 0.6*comp_time
                    # TODO: remove it later
                    #generated_comp_time = comp_time + (0.2 * comp_time * random.random() - 0.1 * comp_time)
                    #generated_comp_time = comp_time - (0.2 * comp_time * (random.random() - 0.95))



                    #print("cloud reliability: " + str(fp))
                    if check_fail(fp):

                        event_start = TaskStart(event.task)
                        event_start.time_happened = self.current_time
                        event_start.node = nd
                        self.post(event_start)


                        duration = self.base_fail_duration + self.base_fail_dispersion *random.random()
                        time_of_fail = generated_comp_time*random.random()
                        time_of_fail = self.current_time + (time_of_fail if time_of_fail > 0 else 0.01) ##(item.end_time - self.current_time)*0.01

                        event_failed = NodeFailed(nd, event.task)
                        event_failed.time_happened = time_of_fail

                        event_nodeup = NodeUp(nd)
                        event_nodeup.time_happened = time_of_fail + duration

                        self.post(event_failed)
                        self.post(event_nodeup)
                    else:
                        event_start = TaskStart(event.task)
                        event_start.time_happened = self.current_time
                        event_start.node = nd

                        event_finish = TaskFinished(event.task)
                        event_finish.time_happened = self.current_time + generated_comp_time
                        event_finish.node = nd

                        self.post(event_start)
                        self.post(event_finish)

                    prm.checkBusy(nd, True)

                self.register[event.task] = CloudHeftExecutor.STATUS_RUNNING
                pass

            reliability = self.heft_planner.estimator.estimate_reliability(event.task, node)
            if check_fail(reliability):
                # generate fail time, post it
                duration = self.base_fail_duration + self.base_fail_dispersion *random.random()
                time_of_fail = (item.end_time - self.current_time)*random.random()
                time_of_fail = self.current_time + (time_of_fail if time_of_fail > 0 else 0.01) ##(item.end_time - self.current_time)*0.01

                event_failed = NodeFailed(node, event.task)
                event_failed.time_happened = time_of_fail

                event_nodeup = NodeUp(node)
                event_nodeup.time_happened = time_of_fail + duration

                self.post(event_failed)
                self.post(event_nodeup)
                # remove TaskFinished event
                self.queue = deque([ev for ev in self.queue if not (isinstance(ev, TaskFinished) and ev.task.id == event.task.id and not prm.isCloudNode(ev.node))])

                pass
            return None
        if isinstance(event, TaskFinished):

            # check if it cloud task
            # if task cloud and first: register as finished, check node in dedicated as finish, remove appropriate event of failure or task finished for dedicated, free cloud node, reschedule, end_of_function
            # if task cloud and not first: free cloud node, end_of_function
            # if task not cloud and first: register as finished, check node in dedicated as finish, end_of_function
            prm = self.public_resources_manager
            from_cloud = prm.isCloudNode(event.node)
            if from_cloud and self.register[event.task] == CloudHeftExecutor.STATUS_RUNNING:
                # print("gotcha task: " + str(event.task))
                self.register[event.task] = CloudHeftExecutor.STATUS_FINISHED
                ## TODO: correct it
                ## if event.task failed and went through rescheduling,
                ## it would be possible that currently ScheduleItem of event.task on dedicated resource
                ## has UNSTARTED state.
                ## TODO: add additional functional to schedule to record such situations and validate it after
                found = self.current_schedule.change_state_executed_with_end_time(event.task, ScheduleItem.FINISHED, self.current_time)
                pair = self.current_schedule.place_single(event.task)
                if pair is not None:
                    ## TODO: The bug is here. Fix it later.
                    ## the unstarted case must be taken into account in schedule and in the validity check procedure too
                    (nd, item) = pair
                    if item.state == ScheduleItem.EXECUTING:
                        item.start_time = event.time_happened
                        item.end_time = event.time_happened
                        item.state = ScheduleItem.FINISHED
                        self.queue = [ev for ev in self.queue if not (not isinstance(ev, NodeUp) and ev.task.id == event.task.id)]
                    else:
                        prm.checkBusy(event.node, False)
                        return None
                def check(ev):
                    if isinstance(ev, TaskFinished) or isinstance(ev, NodeFailed):
                        if ev.task.id == event.task.id and not prm.isCloudNode(ev.node):
                            return False
                    ## TODO: make it later
                    ##if isinstance(ev, NodeUp):
                    return True
                self.queue = [ev for ev in self.queue if check(ev)]
                prm.checkBusy(event.node, False)
                reschedule(event)
                return None
            if from_cloud and self.register[event.task] == CloudHeftExecutor.STATUS_FINISHED:
                prm.checkBusy(event.node, False)
                return None

            # check task finished
            self.register[event.task] = CloudHeftExecutor.STATUS_FINISHED
            self.current_schedule.change_state_executed(event.task, ScheduleItem.FINISHED)
            return None
        if isinstance(event, NodeFailed):

            # check if cloud node
            # if cloud node: check as down, free node, end_of_function
            # if not cloud node: check as down, reschedule, end_of_function
            prm = self.public_resources_manager
            from_cloud = prm.isCloudNode(event.node)

            if from_cloud:
                prm.checkDown(event.node.name, True)
                prm.checkBusy(event.node, False)
                return None


            # check node down
            self.heft_planner.resource_manager.node(event.node).state = Node.Down
            # check failed event in schedule
            ## TODO: ambigious choice
            ##self.current_schedule.change_state(event.task, ScheduleItem.FAILED)
            it = [item for item in self.current_schedule.mapping[event.node] if item.job.id == event.task.id and item.state == ScheduleItem.EXECUTING]
            if len(it) != 1:
                ## TODO: raise exception here
                pass

            it[0].state = ScheduleItem.FAILED
            it[0].end_time = self.current_time

            reschedule(event)
            return None
        if isinstance(event, NodeUp):

            # check if cloud
            # if cloud: check node up, end_of_function
            # if not cloud: check as up, reschedule end_of_function
            prm = self.public_resources_manager
            from_cloud = prm.isCloudNode(event.node)
            if from_cloud:
                prm.checkDown(event.node.name, False)
                return None


            # check node up
            self.heft_planner.resource_manager.node(event.node).state = Node.Unknown
            reschedule(event)
            return None
        return None

    def post_new_events(self):
        unstarted_items = set()
        for (node, items) in self.current_schedule.mapping.items():
            for item in items:
                if item.state == ScheduleItem.UNSTARTED:
                    unstarted_items.add((node, item))

        events_to_post = []
        for (node, item) in unstarted_items:
            event_start = TaskStart(item.job)
            event_start.time_happened = item.start_time
            event_start.node = node

            event_finish = TaskFinished(item.job)
            event_finish.time_happened = item.end_time
            event_finish.node = node

            events_to_post
            self.post(event_start)
            self.post(event_finish)
        pass

    def clean_events(self, event):

        # remove all unstarted tasks
        cleaned_task = set()
        if isinstance(event, NodeFailed):
            cleaned_task = set([event.task])

        new_mapping = dict()
        for (node, items) in self.current_schedule.mapping.items():
            new_mapping[node] = []
            for item in items:
                if item.state != ScheduleItem.UNSTARTED:
                    new_mapping[node].append(item)
                else:
                    cleaned_task.add(item.job)
        clean_schedule = Schedule(new_mapping)
        # remove all events associated with these tasks
        prm = self.public_resources_manager

        def check(event):
            if isinstance(event, TaskStart) and event.task in cleaned_task and not prm.isCloudNode(event.node):
                return False
            if isinstance(event, TaskFinished) and event.task in cleaned_task and not prm.isCloudNode(event.node):
                return False
            return True
        new_queue = deque([evnt for evnt in self.queue if check(evnt)])
        self.queue = new_queue
        return clean_schedule

Example #29

0

Show file

File: SimpleRandomizedHeuristic.py Project: fonhorst/heft

    def schedule(self, fixed_schedule_part=None, current_time=0.0):

        estimate = self.estimator.estimate_transfer_time

        # TODO: make common utility function with ScheduleBuilder
        def is_last_version_of_task_executing(item):
            return item.state == ScheduleItem.EXECUTING or item.state == ScheduleItem.FINISHED or item.state == ScheduleItem.UNSTARTED

        def _get_ready_tasks(children, finished_tasks):
            def _is_child_ready(child):
                ids = set([p.id for p in child.parents])
                result = False in [id in finished_tasks for id in ids]
                return not result

            ready_children = [
                child for child in children if _is_child_ready(child)
            ]
            return ready_children

        if fixed_schedule_part is None:
            schedule_mapping = {node: [] for node in self.nodes}
            ready_tasks = [
                child.id for child in self.workflow.head_task.children
            ]
            task_to_node = dict()
            finished_tasks = set()
        else:
            schedule_mapping = {
                node: [item for item in items]
                for (node, items) in fixed_schedule_part.mapping.items()
            }
            finished_tasks = [
                item.job.id
                for (node, items) in fixed_schedule_part.mapping.items()
                for item in items if is_last_version_of_task_executing(item)
            ]
            finished_tasks = set([self.workflow.head_task.id] + finished_tasks)
            unfinished = [
                task for task in self.workflow.get_all_unique_tasks()
                if not task.id in finished_tasks
            ]
            ready_tasks = [
                task.id
                for task in _get_ready_tasks(unfinished, finished_tasks)
            ]
            task_to_node = {
                item.job.id: (node, item.start_time, item.end_time)
                for (node, items) in fixed_schedule_part.mapping.items()
                for item in items if is_last_version_of_task_executing(item)
            }

        def is_child_ready(child):
            ids = set([p.id for p in child.parents])
            result = False in [id in finished_tasks for id in ids]
            return not result

        def find_slots(node, comm_ready, runtime):
            node_schedule = schedule_mapping.get(node, list())
            free_time = 0 if len(
                node_schedule) == 0 else node_schedule[-1].end_time
            ## TODO: refactor it later
            f_time = max(free_time, comm_ready)
            f_time = max(f_time, current_time)
            base_variant = [(f_time, f_time + runtime + 1)]
            zero_interval = [] if len(node_schedule) == 0 else [
                (0, node_schedule[0].start_time)
            ]
            middle_intervals = [(node_schedule[i].end_time,
                                 node_schedule[i + 1].start_time)
                                for i in range(len(node_schedule) - 1)]
            intervals = zero_interval + middle_intervals + base_variant

            #result = [(st, end) for (st, end) in intervals if st >= comm_ready and end - st >= runtime]
            ## TODO: rethink rounding
            result = [
                (st, end) for (st, end) in intervals
                if (current_time < st or abs((current_time - st)) < 0.01)
                and st >= comm_ready and (
                    runtime < (end - st) or abs((end - st) - runtime) < 0.01)
            ]
            return result

        def comm_ready_func(task, node):
            ##TODO: remake this stub later.
            if len(task.parents) == 1 and self.workflow.head_task.id == list(
                    task.parents)[0].id:
                return 0
            return max([
                task_to_node[p.id][2] +
                estimate(node, task_to_node[p.id][0], task, p)
                for p in task.parents
            ])

        def get_possible_execution_times(task, node):
            ## pay attention to the last element in the resulted seq
            ## it represents all available time of node after it completes all its work
            ## (if such interval can exist)
            ## time_slots = [(st1, end1),(st2, end2,...,(st_last, st_last + runtime)]
            runtime = self.estimator.estimate_runtime(task, node)
            comm_ready = comm_ready_func(task, node)
            time_slots = find_slots(node, comm_ready, runtime)
            return time_slots, runtime

        while len(ready_tasks) > 0:
            choosed_index = random.randint(0, len(ready_tasks) - 1)
            task = self.task_map[ready_tasks[choosed_index]]

            #TODO: make checking for all nodes are dead.(It's a very rare situation so it is not consider for now)
            alive_nodes = [
                node for node in self.nodes if node.state != Node.Down
            ]
            choosed_node_index = random.randint(0, len(alive_nodes) - 1)
            node = alive_nodes[choosed_node_index]

            time_slots, runtime = get_possible_execution_times(task, node)
            choosed_time_index = 0 if len(time_slots) == 1 else random.randint(
                0,
                len(time_slots) - 1)
            time_slot = time_slots[choosed_time_index]

            start_time = time_slot[0]
            end_time = start_time + runtime

            item = ScheduleItem(task, start_time, end_time)
            ##schedule_mapping[node].append(item)
            Schedule.insert_item(schedule_mapping, node, item)
            task_to_node[task.id] = (node, start_time, end_time)

            ##print('I am here')
            ready_tasks.remove(task.id)
            finished_tasks.add(task.id)

            ready_children = [
                child for child in task.children if is_child_ready(child)
            ]
            for child in ready_children:
                ready_tasks.append(child.id)

        schedule = Schedule(schedule_mapping)
        return schedule

Example #30

0

Show file

File: AddingWFgaheftExample.py Project: fonhorst/heft

wf_added_times = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
#wf_added_times = [0.1]

initial_wf_name = "Montage_30"
added_wf_name = "Montage_25"

initial_wf = ExecutorRunner.get_wf(initial_wf_name, "00")
added_wf = ExecutorRunner.get_wf(added_wf_name, "10")
bundle = Utility.get_default_bundle()
(estimator, resource_manager,
 initial_schedule) = ExecutorRunner.get_infrastructure(bundle, 1.0, False)

## planning for initial wf
heft = DynamicHeft(initial_wf, resource_manager, estimator)
empty_schedule = Schedule({node: [] for node in heft.get_nodes()})
ga = GAComputationManager(15, initial_wf, resource_manager, estimator)

ga_initial_schedule = ga._get_ga_alg()(empty_schedule, None)[2]

all_initial_wf_time = Utility.makespan(ga_initial_schedule)

print("Initial time: " + str(all_initial_wf_time))

n = 5


## planning for added wf
def gaheft_reschedule(wf_added_time):

    copy_gaheft_schedule = Schedule({

Example #31

0

Show file

File: GaHeftExecutor.py Project: fonhorst/heft

class GaHeftExecutor(FailRandom, BaseExecutor):
    #@trace
    def __init__(self, **kwargs):

        super().__init__(**kwargs)

        self.workflow = kwargs["wf"]
        self.resource_manager = kwargs["resource_manager"]
        # DynamicHeft
        # both planners have acess to resource manager and estimator
        self.heft_planner = kwargs["heft_planner"]
        self.base_fail_duration = kwargs["base_fail_duration"]
        self.base_fail_dispersion = kwargs["base_fail_dispersion"]
        self.current_schedule = None
        self.fixed_interval_for_ga = kwargs["fixed_interval_for_ga"]
        self.ga_builder = kwargs["ga_builder"]
        self.replace_anyway = kwargs.get("replace_anyway", True)

        self.back_cmp = None

        pass

    def init(self):
        self.current_schedule = Schedule({node: [] for node in self.heft_planner.get_nodes()})

        initial_schedule = self.heft_planner.run(Schedule({node: [] for node in self.heft_planner.get_nodes()}))

        # print("heft solution!")
        # fsh = [hash(key) for key in initial_schedule.mapping.keys()]
        # rm_hashes = [hash(node) for node in self.resource_manager.get_nodes()]
        # if any(((h not in fsh) for h in rm_hashes)):
        #     raise Exception("Fixed schedule is broken")


        # TODO: change these two ugly records
        result = self.ga_builder()(self.current_schedule, initial_schedule)


        # print("Ga solution is broken!")
        # fsh = [hash(key) for key in result[0][2].mapping.keys()]
        # rm_hashes = [hash(node) for node in self.resource_manager.get_nodes()]
        # if any(((h not in fsh) for h in rm_hashes)):
        #     raise Exception("Fixed schedule is broken")



        if not self._apply_mh_if_better(None, heuristic_resulted_schedule=initial_schedule,
                           metaheuristic_resulted_schedule=result[0][2]):
            self.current_schedule = initial_schedule
            self._post_new_events()

        # print("Before Before!")
        # fsh = [hash(key) for key in self.current_schedule.mapping.keys()]
        # rm_hashes = [hash(node) for node in self.resource_manager.get_nodes()]
        # if any(((h not in fsh) for h in rm_hashes)):
        #     raise Exception("Fixed schedule is broken")


        #self.current_schedule = result[0][2]
        #self._post_new_events()
        return result

    def _task_start_handler(self, event):

        res = self._check_event_for_ga_result(event)
        if res:
            return
        # check task as executing
        # self.current_schedule.change_state(event.task, ScheduleItem.EXECUTING)
        # try to find nodes in cloud
        # check if failed and post
        (node, item) = self.current_schedule.place_by_time(event.task, event.time_happened)
        item.state = ScheduleItem.EXECUTING

        if not self._is_a_fail_possible():
            return

        if self._check_fail(event.task, node):
            # generate fail time, post it
            duration = self.base_fail_duration + self.base_fail_dispersion *random.random()
            time_of_fail = (item.end_time - self.current_time)*random.random()
            time_of_fail = self.current_time + (time_of_fail if time_of_fail > 0 else 0.01) ##(item.end_time - self.current_time)*0.01

            event_failed = NodeFailed(node, event.task)
            event_failed.time_happened = time_of_fail

            event_nodeup = NodeUp(node)
            event_nodeup.time_happened = time_of_fail + duration

            self.post(event_failed)
            self.post(event_nodeup)


        pass

    def _task_finished_handler(self, event):
        # check task finished
        self.current_schedule.change_state_executed(event.task, ScheduleItem.FINISHED)
        self._check_event_for_ga_result(event)
        pass

    def _node_failed_handler(self, event):

        if not self._is_a_fail_possible():
            return



        self._remove_events(lambda ev: not (isinstance(ev, TaskFinished) and ev.task.id == event.task.id))

        ## interrupt ga
        self._stop_ga()
        # check node down
        self.resource_manager.node(event.node).state = Node.Down
        # check failed event in schedule
        ## TODO: ambigious choice
        ##self.current_schedule.change_state(event.task, ScheduleItem.FAILED)
        it = [item for item in self.current_schedule.mapping[event.node] if item.job.id == event.task.id and item.state == ScheduleItem.EXECUTING]
        if len(it) != 1:
            raise Exception(" Trouble in finding of the task: count of found tasks {0}".format(len(it)))

        it[0].state = ScheduleItem.FAILED
        it[0].end_time = self.current_time

        # print("Before!")
        # fsh = [hash(key) for key in self.current_schedule.mapping.keys()]
        # rm_hashes = [hash(node) for node in self.resource_manager.get_nodes()]
        # if any(((h not in fsh) for h in rm_hashes)):
        #     raise Exception("Fixed schedule is broken")

        # run HEFT
        self._reschedule(event)

        # print("After!")
        # fsh = [hash(key) for key in self.current_schedule.mapping.keys()]
        # rm_hashes = [hash(node) for node in self.resource_manager.get_nodes()]
        # if any(((h not in fsh) for h in rm_hashes)):
        #     raise Exception("Fixed schedule is broken")


        #run GA
        self._run_ga_in_background(event)
        pass

    def _node_up_handler(self, event):
        ## interrupt ga
        self._stop_ga()
        # check node up
        self.heft_planner.resource_manager.node(event.node).state = Node.Unknown

        # print("Before!")
        # fsh = [hash(key) for key in self.current_schedule.mapping.keys()]
        # rm_hashes = [hash(node) for node in self.resource_manager.get_nodes()]
        # if any(((h not in fsh) for h in rm_hashes)):
        #     raise Exception("Fixed schedule is broken")

        self._reschedule(event)

        # print("After!")
        # fsh = [hash(key) for key in self.current_schedule.mapping.keys()]
        # rm_hashes = [hash(node) for node in self.resource_manager.get_nodes()]
        # if any(((h not in fsh) for h in rm_hashes)):
        #     raise Exception("Fixed schedule is broken")
        #run GA
        self._run_ga_in_background(event)
        pass

    def _stop_ga(self):
        self.back_cmp = None
        pass

    def _actual_ga_run(self):

        ## this way makes it possible to calculate what time
        ## ga actually has to find solution
        ## this value is important when you need account events between
        ## planned start and stop points
        # ga_interval = self.current_time - self.back_cmp.creation_time

        ## fixed_schedule is actual because
        ## we can be here only if there haven't been any invalidate events
        ## such as node failures
        ## in other case current ga background computation would be dropped
        ## and we wouldn't get here at all
        result = self.ga_builder()(self.back_cmp.fixed_schedule,
                                   # self.back_cmp.initial_schedule,
                                   self.back_cmp.current_schedule,
                                   self.current_time)
        print("CURRENT MAKESPAN: {0}".format(Utility.makespan(result[0][2])))
        return result

    def _check_event_for_ga_result(self, event):

        # check for time to get result from GA running background
        if self.back_cmp is None or self.back_cmp.time_to_stop != self.current_time:
            return False
        else:
            print("Event {0}".format(event))
            if isinstance(event, TaskStart):
                print("Task id {0}".format(event.task.id))
            result = self._actual_ga_run()

        if result is not None:
            return self._apply_mh_if_better(event, heuristic_resulted_schedule=self.current_schedule,
                                      metaheuristic_resulted_schedule=result[0][2])

        return False

    def _replace_current_schedule(self, event, new_schedule):
        # syncrhonize fixed part of new_schedule with the old schedule - lets assume new_schedule already synchonized
        # remove all events related with the old schedule
        # replace current with new
        # generate events of new schedule and post their
        if event is not None:
            self._clean_events(event)
        self.current_schedule = new_schedule
        self._post_new_events()

        self.back_cmp = None
        pass

    def _apply_mh_if_better(self, event, heuristic_resulted_schedule, metaheuristic_resulted_schedule):
        t1 = Utility.makespan(metaheuristic_resulted_schedule)
        t2 = Utility.makespan(heuristic_resulted_schedule)
        print("Replace anyway - {0}".format(self.replace_anyway))
        if self.replace_anyway is True or t1 < t2:
            ## generate new events
            self._replace_current_schedule(event, metaheuristic_resulted_schedule)
            ## if event is TaskStarted event the return value means skip further processing
            return True
        else:
            ## TODO: run_ga_yet_another_with_old_genome
            # self.ga_computation_manager.run(self.current_schedule, self.current_time)
            #self._run_ga_in_background(event)
            self.back_cmp = None
            return False
        pass

    # def _is_a_fail_possible(self):
    #     if len([nd for nd in self.resource_manager.get_nodes() if nd.state != Node.Down]) == 1:
    #         print("DECLINE NODE DOWN")
    #         st = functools.reduce(operator.add, (" {0} - {1}".format(nd.name, nd.state) for nd in self.resource_manager.get_nodes()), "")
    #         print("STATE INFORMATION: " + st)
    #         return False
    #     return True

    def _is_a_fail_possible(self):
        return True



    def _run_ga_in_background(self, event):

        if len([nd for nd in self.resource_manager.get_nodes() if nd.state != Node.Down]) == 0:
            return

        current_schedule = self.current_schedule
        current_time = self.current_time
        ## TODO: replace by log call
        print("Time: " + str(current_time) + " Creating reschedule point ")
        ## there can be several events in one time
        ## we choose the first to handle background GA run
        def _get_front_line(schedule, current_time, fixed_interval):
            event_time = current_time + fixed_interval
            min_item = ScheduleItem.MIN_ITEM()

            for (node, items) in schedule.mapping.items():
                for item in items:
                    ## It accounts case when event_time appears in a transfer gap(rare situation for all nodes)
                    ## TODO: compare with some precison
                    if event_time < item.end_time < min_item.end_time:
                        min_item = item
                        break

            if min_item.job is None:
                return None
            print("Time: " + str(current_time) + " reschedule point have been founded st:" + str(min_item.start_time) + " end:" + str(min_item.end_time))
            return min_item

        def _get_fixed_schedule(schedule, front_event):
            def is_before_event(item):
                # hard to resolve corner case. The simulator doesn't guranteed the order of appearing events.
                if item.start_time < front_event.end_time:
                    return True
                ## TODO: Urgent!!! experimental change. Perhaps, It should be removed from here later.
                if item.state == ScheduleItem.FINISHED or item.state == ScheduleItem.FAILED:
                    return True
                return False
            ##TODO: it's dangerous operation.
            ## TODO: need create new example of ScheduleItem.
            def set_proper_state(item):

                new_item = ScheduleItem.copy(item)

                non_finished = new_item.state == ScheduleItem.EXECUTING or new_item.state == ScheduleItem.UNSTARTED
                ## TODO: Urgent!: dangerous place
                if non_finished and new_item.end_time <= front_event.end_time:
                    new_item.state = ScheduleItem.FINISHED
                if non_finished and new_item.end_time > front_event.end_time:
                    new_item.state = ScheduleItem.EXECUTING
                return new_item
            fixed_mapping = {key: [set_proper_state(item) for item in items if is_before_event(item)] for (key, items) in schedule.mapping.items()}
            return Schedule(fixed_mapping)

        ## TODO: make previous_result used
        def run_ga(current_schedule):
            fixed_interval = self.fixed_interval_for_ga
            front_event = _get_front_line(current_schedule, current_time, fixed_interval)
            # we can't meet the end of computation so we do nothing
            if front_event is None:
                print("GA's computation isn't able to meet the end of computation")
                return
            fixed_schedule = _get_fixed_schedule(current_schedule, front_event)

            #TODO: It isn't a good reliable solution. It should be reconsider later.
            fixed_ids = set(fixed_schedule.get_all_unique_tasks_id())
            all_ids = set(task.id for task in self.workflow.get_all_unique_tasks())

            ## TODO: urgent bugfix to correctly run GaHeftvsHeft
            if len(fixed_ids) == len(all_ids):
                print("Fixed schedule is complete. There is no use to run ga.")
                return

            fsh = [hash(key) for key in fixed_schedule.mapping.keys()]
            rm_hashes = [hash(node) for node in self.resource_manager.get_nodes()]
            if any(((h not in fsh) for h in rm_hashes)):
                raise Exception("Fixed schedule is broken")

            self.back_cmp = BackCmp(fixed_schedule, None, self.current_schedule, event, current_time, front_event.end_time)
            pass

        is_running = self.back_cmp is not None

        if not is_running:
            run_ga(current_schedule)
        else:
            self.back_cmp = None
            run_ga(current_schedule)


        ## TODO: only for debug. remove it later.
        # print("==================FIXED SCHEDULE PART=================")
        # print(self.back_cmp.fixed_schedule)
        # print("======================================================")

    pass

Example #32

0

Show file

File: GaOldPopExecutor.py Project: fonhorst/heft

class GaOldPopExecutor(FailOnce, BaseExecutor):

    def __init__(self, **kwargs):

        super().__init__()

        self.estimator = kwargs["estimator"]
        self.base_fail_duration = kwargs["base_fail_duration"]
        self.base_fail_dispersion = kwargs["base_fail_dispersion"]
        self.workflow = kwargs["wf"]
        self.resource_manager = kwargs["resource_manager"]
        self.stat_saver = kwargs["stat_saver"]
        self.task_id_to_fail = kwargs["task_id_to_fail"]
        self.ga_builder = kwargs["ga_builder"]

        self.current_schedule = None
        self.past_pop = None
        pass

    def init(self):
        ## TODO: replace it with logging
        print("Working with initial state of nodes: {0}".format([n.flops for n in self.resource_manager.get_nodes()]))

        ga_planner = self.ga_builder()
        self.current_schedule = Schedule({node: [] for node in self.resource_manager.get_nodes()})
        (result, logbook) = ga_planner(self.current_schedule, None)
        self.past_pop = ga_planner.get_pop()
        print("Result makespan: " + str(Utility.makespan(result[2])))
        self.current_schedule = result[2]
        self._post_new_events()

        self.failed_once = False
        pass

    def _task_start_handler(self, event):
        # check task as executing
        # self.current_schedule.change_state(event.task, ScheduleItem.EXECUTING)
        # try to find nodes in cloud
        # check if failed and post
        (node, item) = self.current_schedule.place_by_time(event.task, event.time_happened)
        item.state = ScheduleItem.EXECUTING

        if self._check_fail(event.task, node):
            # generate fail time, post it
            duration = self.base_fail_duration + self.base_fail_dispersion *random.random()
            time_of_fail = (item.end_time - self.current_time)*random.random()
            time_of_fail = self.current_time + (time_of_fail if time_of_fail > 0 else 0.01) ##(item.end_time - self.current_time)*0.01

            event_failed = NodeFailed(node, event.task)
            event_failed.time_happened = time_of_fail

            # event_nodeup = NodeUp(node)
            # event_nodeup.time_happened = time_of_fail + duration

            self.post(event_failed)
            # self.post(event_nodeup)

            # remove TaskFinished event
            ##TODO: make a function for this purpose in the base class
            self.queue = deque([ev for ev in self.queue if not (isinstance(ev, TaskFinished) and ev.task.id == event.task.id)])
        pass

    def _task_finished_handler(self, event):
        # check task finished
        self.current_schedule.change_state_executed(event.task, ScheduleItem.FINISHED)
        pass

    def _node_failed_handler(self, event):
        self.resource_manager.node(event.node).state = Node.Down
        it = [item for item in self.current_schedule.mapping[event.node] if item.job.id == event.task.id and item.state == ScheduleItem.EXECUTING]
        if len(it) != 1:
            raise Exception("several items founded")
            pass

        it[0].state = ScheduleItem.FAILED
        it[0].end_time = self.current_time

        self._reschedule(event)
        pass

    def _node_up_handler(self, event):
        self.resource_manager.node(event.node).state = Node.Unknown
        self._reschedule(event)
        pass

    #@timing
    def _clean_chromosome(self, chromosome, event, current_cleaned_schedule):

        not_scheduled_tasks = [ item.job.id for (node, items) in current_cleaned_schedule.mapping.items() for item in items if item.state == ScheduleItem.FINISHED or item.state == ScheduleItem.EXECUTING]

        for (node_name, ids) in chromosome.items():
            for_removing = []
            for id in ids:
                if id in not_scheduled_tasks:
                    for_removing.append(id)
                pass
            for r in for_removing:
                ids.remove(r)
                pass
            pass

        if isinstance(event, NodeFailed):
            tasks = chromosome[event.node.name]
            ## TODO: here must be a procedure of getting currently alive nodes
            working_nodes = list(chromosome.keys() - set([event.node.name]))
            for t in tasks:
                lt = len(working_nodes) - 1
                new_node = 0 if lt == 0 else random.randint(0, lt )
                node_name = working_nodes[new_node]
                length = len(chromosome[node_name])
                # TODO: correct 0 and length
                new_place = 0 if length == 0 else random.randint(0, length)
                chromosome[node_name].insert(new_place, t)
            chromosome[event.node.name] = []
            return chromosome
        if isinstance(event, NodeUp):
            pass
        return chromosome

    def _reschedule(self, event):
        current_cleaned_schedule = self._clean_events(event)

        task_id = "" if not hasattr(event, 'task') else " " + str(event.task.id)
        ## scheduling with initial population created of the previous population by moving elements from a downed node
        print("Scheduling with the old pop: " + str(event.__class__.__name__) + task_id )
        ga_planner = self.ga_builder()

        cleaned_chromosomes = [self._clean_chromosome(ch, event, current_cleaned_schedule) for ch in self.past_pop]
        def is_empty(ch):
            return len([item for n, items in ch.items() for item in items]) == 0
        cleaned_chromosomes = [ch for ch in cleaned_chromosomes if not is_empty(ch)]
        cleaned_chromosomes = None if len(cleaned_chromosomes) == 0 else cleaned_chromosomes

        curr_ids = frozenset(current_cleaned_schedule.get_all_unique_tasks_id())
        all_ids = frozenset(t.id for t in self.workflow.get_all_unique_tasks())
        if all_ids == curr_ids:
            print("Schedule alleady has all unique tasks")
            return

        ((v1, v2, resulted_schedule, iter_old_pop), logbook_old_pop) = ga_planner(current_cleaned_schedule, None, self.current_time, initial_population=cleaned_chromosomes)
        #checking
        Utility.check_and_raise_for_fixed_part(resulted_schedule, current_cleaned_schedule, self.current_time)
        makespan_old_pop = Utility.makespan(resulted_schedule)
        print("Result makespan: " + str(makespan_old_pop))



        self.current_schedule = resulted_schedule
        self.past_pop = ga_planner.get_pop()

        ## scheduling with random initial population
        print("Scheduling with a random pop: " + str(event.__class__.__name__)+ task_id)
        ga_planner_with_random_init_population = self.ga_builder()
        ((v3, v4, schedule_with_random, iter_random), logbook_random) = ga_planner_with_random_init_population(current_cleaned_schedule, None, self.current_time, initial_population=None)

        Utility.check_and_raise_for_fixed_part(schedule_with_random, current_cleaned_schedule, self.current_time)
        makespan_random = Utility.makespan(schedule_with_random)

        print("Result makespan: " + str(Utility.makespan(schedule_with_random)))


        # creating and writing some stat data
        # Note: it can be rewritten with using of events
        if self.stat_saver is not None:
            stat_data = {
                "wf_name": self.workflow.name,
                "event_name": event.__class__.__name__,
                "task_id": task_id,
                "with_old_pop": {
                    "iter": iter_old_pop,
                    "makespan": makespan_old_pop,
                    "pop_aggr": logbook_old_pop
                },
                "with_random": {
                    "iter": iter_random,
                    "makespan": makespan_random,
                    "pop_aggr": logbook_random
                }
            }
            self.stat_saver(stat_data)


        self._post_new_events()
        pass

    pass

Example #33

0

Show file

File: CloudHeftExecutor.py Project: fonhorst/heft

class CloudHeftExecutor(EventMachine):

    STATUS_RUNNING = 'running'
    STATUS_FINISHED = 'finished'

    def __init__(self,
                 heft_planner,
                 base_fail_duration,
                 base_fail_dispersion,
                 desired_reliability,
                 public_resource_manager,
                 initial_schedule=None):
        ## TODO: remake it later
        self.queue = deque()
        self.current_time = 0
        # DynamicHeft
        self.heft_planner = heft_planner
        self.base_fail_duration = base_fail_duration
        self.base_fail_dispersion = base_fail_dispersion
        self.desired_reliability = desired_reliability
        self.public_resources_manager = public_resource_manager
        #self.current_schedule = Schedule({node: [] for node in heft_planner.get_nodes()})
        self.initial_schedule = initial_schedule
        self.current_schedule = initial_schedule

        self.register = dict()

    def init(self):
        #self.current_schedule = self.heft_planner.run(self.current_schedule)
        if self.initial_schedule is None:
            self.current_schedule = Schedule(
                {node: []
                 for node in self.heft_planner.get_nodes()})
            self.current_schedule = self.heft_planner.run(
                self.current_schedule)
        else:
            id_to_task = {
                tsk.id: tsk
                for tsk in HeftHelper.get_all_tasks(self.heft_planner.workflow)
            }
            mapping = {
                node: [
                    ScheduleItem(id_to_task[item.job.id], item.start_time,
                                 item.end_time) for item in items
                ]
                for (node, items) in self.initial_schedule.mapping.items()
            }
            self.current_schedule = Schedule(mapping)
        self.post_new_events()

    def event_arrived(self, event):
        def reschedule(event):
            self.heft_planner.current_time = self.current_time
            current_cleaned_schedule = self.clean_events(event)
            self.current_schedule = self.heft_planner.run(
                current_cleaned_schedule)
            self.post_new_events()

        def check_fail(reliability):
            res = random.random()
            if res > reliability:
                return True
            return False

        if isinstance(event, TaskStart):

            # TODO: if node is cloud node, do nothing
            prm = self.public_resources_manager
            if prm.isCloudNode(event.node):
                return None

            # check if failed and post
            (node, item) = self.current_schedule.place_by_time(
                event.task, event.time_happened)
            item.state = ScheduleItem.EXECUTING

            # check task as executing
            # self.current_schedule.change_state(event.task, ScheduleItem.EXECUTING)

            # public_resources_manager:
            #   determine nodes of proper soft type
            #   check and determine free nodes
            #   determine reliability of every nodes
            #   determine time_of_execution probability for (task,node) pair

            # try to find nodes in cloud

            if event.task not in self.register:

                proper_nodes = prm.get_by_softreq(event.task.soft_reqs)
                proper_nodes = [
                    node for node in proper_nodes if not prm.isBusy(node)
                ]
                sorted_proper_nodes = sorted(
                    proper_nodes, key=lambda x: prm.get_reliability(x.name))
                current_set = []

                base_reliability = self.heft_planner.estimator.estimate_reliability(
                    event.task, event.node)
                obtained_reliability = base_reliability
                dt = item.end_time - item.start_time

                def calc(node, dt):
                    #(dt, task, node, transfer_estimation)
                    # TODO: add proper transfer time here
                    fp = prm.get_reliability(node.name)
                    comp_time = self.heft_planner.estimator.estimate_runtime(
                        event.task, node)
                    cp = prm.probability_estimator(dt, comp_time, 0)
                    #TODO: remove it later
                    #cp = 0.95
                    #print("cp: " + str(cp))
                    return (node, fp, cp)

                it_comm_buf = 0
                for pnode in sorted_proper_nodes:
                    common_reliability = 1 - base_reliability
                    #TODO: refactor this later
                    if 1 - common_reliability >= self.desired_reliability:
                        break
                    res = calc(pnode, dt)
                    current_set.append(res)
                    #TODO: add dencity law of probability for dedicated resource

                    for (nd, fp, cp) in current_set:
                        common_reliability *= (1 - fp * cp)
                    common_reliability = 1 - common_reliability
                    #print("common_reliability: " + str(common_reliability))
                    it_comm_buf = common_reliability
                    if common_reliability >= self.desired_reliability:
                        #print("Commmon: "+ str(common_reliability))
                        break

                #print("Comm " + str(it_comm_buf) + " task: " + str(event.task.id))
                #print(" Obtained reliability " + str(obtained_reliability) + " for task: " + str(event.task))

                def frange(x, y, jump):
                    while x < y:
                        yield x
                        x += jump

                for (nd, fp, cp) in current_set:
                    comp_time = self.heft_planner.estimator.estimate_runtime(
                        event.task, nd)
                    #sigma 0.1*M lets take 0.6*M

                    #TODO: uncomment it later

                    ints = [(i, calc(nd, i))
                            for i in frange(0, comp_time +
                                            0.2 * comp_time, 0.05 * comp_time)]
                    rd = random.random()
                    generated_comp_time = comp_time
                    for (i, p) in ints:
                        if p[2] > rd:
                            generated_comp_time = i
                            break

                    #comp_time + 0.6*comp_time
                    # TODO: remove it later
                    #generated_comp_time = comp_time + (0.2 * comp_time * random.random() - 0.1 * comp_time)
                    #generated_comp_time = comp_time - (0.2 * comp_time * (random.random() - 0.95))

                    #print("cloud reliability: " + str(fp))
                    if check_fail(fp):

                        event_start = TaskStart(event.task)
                        event_start.time_happened = self.current_time
                        event_start.node = nd
                        self.post(event_start)

                        duration = self.base_fail_duration + self.base_fail_dispersion * random.random(
                        )
                        time_of_fail = generated_comp_time * random.random()
                        time_of_fail = self.current_time + (
                            time_of_fail if time_of_fail > 0 else 0.01
                        )  ##(item.end_time - self.current_time)*0.01

                        event_failed = NodeFailed(nd, event.task)
                        event_failed.time_happened = time_of_fail

                        event_nodeup = NodeUp(nd)
                        event_nodeup.time_happened = time_of_fail + duration

                        self.post(event_failed)
                        self.post(event_nodeup)
                    else:
                        event_start = TaskStart(event.task)
                        event_start.time_happened = self.current_time
                        event_start.node = nd

                        event_finish = TaskFinished(event.task)
                        event_finish.time_happened = self.current_time + generated_comp_time
                        event_finish.node = nd

                        self.post(event_start)
                        self.post(event_finish)

                    prm.checkBusy(nd, True)

                self.register[event.task] = CloudHeftExecutor.STATUS_RUNNING
                pass

            reliability = self.heft_planner.estimator.estimate_reliability(
                event.task, node)
            if check_fail(reliability):
                # generate fail time, post it
                duration = self.base_fail_duration + self.base_fail_dispersion * random.random(
                )
                time_of_fail = (item.end_time -
                                self.current_time) * random.random()
                time_of_fail = self.current_time + (
                    time_of_fail if time_of_fail > 0 else 0.01
                )  ##(item.end_time - self.current_time)*0.01

                event_failed = NodeFailed(node, event.task)
                event_failed.time_happened = time_of_fail

                event_nodeup = NodeUp(node)
                event_nodeup.time_happened = time_of_fail + duration

                self.post(event_failed)
                self.post(event_nodeup)
                # remove TaskFinished event
                self.queue = deque([
                    ev for ev in self.queue
                    if not (isinstance(ev, TaskFinished) and ev.task.id ==
                            event.task.id and not prm.isCloudNode(ev.node))
                ])

                pass
            return None
        if isinstance(event, TaskFinished):

            # check if it cloud task
            # if task cloud and first: register as finished, check node in dedicated as finish, remove appropriate event of failure or task finished for dedicated, free cloud node, reschedule, end_of_function
            # if task cloud and not first: free cloud node, end_of_function
            # if task not cloud and first: register as finished, check node in dedicated as finish, end_of_function
            prm = self.public_resources_manager
            from_cloud = prm.isCloudNode(event.node)
            if from_cloud and self.register[
                    event.task] == CloudHeftExecutor.STATUS_RUNNING:
                # print("gotcha task: " + str(event.task))
                self.register[event.task] = CloudHeftExecutor.STATUS_FINISHED
                ## TODO: correct it
                ## if event.task failed and went through rescheduling,
                ## it would be possible that currently ScheduleItem of event.task on dedicated resource
                ## has UNSTARTED state.
                ## TODO: add additional functional to schedule to record such situations and validate it after
                found = self.current_schedule.change_state_executed_with_end_time(
                    event.task, ScheduleItem.FINISHED, self.current_time)
                pair = self.current_schedule.place_single(event.task)
                if pair is not None:
                    ## TODO: The bug is here. Fix it later.
                    ## the unstarted case must be taken into account in schedule and in the validity check procedure too
                    (nd, item) = pair
                    if item.state == ScheduleItem.EXECUTING:
                        item.start_time = event.time_happened
                        item.end_time = event.time_happened
                        item.state = ScheduleItem.FINISHED
                        self.queue = [
                            ev for ev in self.queue
                            if not (not isinstance(ev, NodeUp)
                                    and ev.task.id == event.task.id)
                        ]
                    else:
                        prm.checkBusy(event.node, False)
                        return None

                def check(ev):
                    if isinstance(ev, TaskFinished) or isinstance(
                            ev, NodeFailed):
                        if ev.task.id == event.task.id and not prm.isCloudNode(
                                ev.node):
                            return False
                    ## TODO: make it later
                    ##if isinstance(ev, NodeUp):
                    return True

                self.queue = [ev for ev in self.queue if check(ev)]
                prm.checkBusy(event.node, False)
                reschedule(event)
                return None
            if from_cloud and self.register[
                    event.task] == CloudHeftExecutor.STATUS_FINISHED:
                prm.checkBusy(event.node, False)
                return None

            # check task finished
            self.register[event.task] = CloudHeftExecutor.STATUS_FINISHED
            self.current_schedule.change_state_executed(
                event.task, ScheduleItem.FINISHED)
            return None
        if isinstance(event, NodeFailed):

            # check if cloud node
            # if cloud node: check as down, free node, end_of_function
            # if not cloud node: check as down, reschedule, end_of_function
            prm = self.public_resources_manager
            from_cloud = prm.isCloudNode(event.node)

            if from_cloud:
                prm.checkDown(event.node.name, True)
                prm.checkBusy(event.node, False)
                return None

            # check node down
            self.heft_planner.resource_manager.node(
                event.node).state = Node.Down
            # check failed event in schedule
            ## TODO: ambigious choice
            ##self.current_schedule.change_state(event.task, ScheduleItem.FAILED)
            it = [
                item for item in self.current_schedule.mapping[event.node]
                if item.job.id == event.task.id
                and item.state == ScheduleItem.EXECUTING
            ]
            if len(it) != 1:
                ## TODO: raise exception here
                pass

            it[0].state = ScheduleItem.FAILED
            it[0].end_time = self.current_time

            reschedule(event)
            return None
        if isinstance(event, NodeUp):

            # check if cloud
            # if cloud: check node up, end_of_function
            # if not cloud: check as up, reschedule end_of_function
            prm = self.public_resources_manager
            from_cloud = prm.isCloudNode(event.node)
            if from_cloud:
                prm.checkDown(event.node.name, False)
                return None

            # check node up
            self.heft_planner.resource_manager.node(
                event.node).state = Node.Unknown
            reschedule(event)
            return None
        return None

    def post_new_events(self):
        unstarted_items = set()
        for (node, items) in self.current_schedule.mapping.items():
            for item in items:
                if item.state == ScheduleItem.UNSTARTED:
                    unstarted_items.add((node, item))

        events_to_post = []
        for (node, item) in unstarted_items:
            event_start = TaskStart(item.job)
            event_start.time_happened = item.start_time
            event_start.node = node

            event_finish = TaskFinished(item.job)
            event_finish.time_happened = item.end_time
            event_finish.node = node

            events_to_post
            self.post(event_start)
            self.post(event_finish)
        pass

    def clean_events(self, event):

        # remove all unstarted tasks
        cleaned_task = set()
        if isinstance(event, NodeFailed):
            cleaned_task = set([event.task])

        new_mapping = dict()
        for (node, items) in self.current_schedule.mapping.items():
            new_mapping[node] = []
            for item in items:
                if item.state != ScheduleItem.UNSTARTED:
                    new_mapping[node].append(item)
                else:
                    cleaned_task.add(item.job)
        clean_schedule = Schedule(new_mapping)
        # remove all events associated with these tasks
        prm = self.public_resources_manager

        def check(event):
            if isinstance(
                    event, TaskStart
            ) and event.task in cleaned_task and not prm.isCloudNode(
                    event.node):
                return False
            if isinstance(
                    event, TaskFinished
            ) and event.task in cleaned_task and not prm.isCloudNode(
                    event.node):
                return False
            return True

        new_queue = deque([evnt for evnt in self.queue if check(evnt)])
        self.queue = new_queue
        return clean_schedule

Example #34

0

Show file

File: SimpleRandomizedHeuristic.py Project: fonhorst/heft

     def schedule(self, fixed_schedule_part=None, current_time=0.0):

         estimate = self.estimator.estimate_transfer_time
         # TODO: make common utility function with ScheduleBuilder
         def is_last_version_of_task_executing(item):
            return item.state == ScheduleItem.EXECUTING or item.state == ScheduleItem.FINISHED or item.state == ScheduleItem.UNSTARTED

         def _get_ready_tasks(children, finished_tasks):
            def _is_child_ready(child):
                ids = set([p.id for p in child.parents])
                result = False in [id in finished_tasks for id in ids]
                return not result
            ready_children = [child for child in children if _is_child_ready(child)]
            return ready_children

         if fixed_schedule_part is None:
            schedule_mapping = {node: [] for node in self.nodes}
            ready_tasks = [child.id for child in self.workflow.head_task.children]
            task_to_node = dict()
            finished_tasks = set()
         else:
            schedule_mapping = {node: [item for item in items] for (node, items) in fixed_schedule_part.mapping.items()}
            finished_tasks = [item.job.id for (node, items) in fixed_schedule_part.mapping.items() for item in items if is_last_version_of_task_executing(item)]
            finished_tasks = set([self.workflow.head_task.id] + finished_tasks)
            unfinished = [task for task in self.workflow.get_all_unique_tasks() if not task.id in finished_tasks]
            ready_tasks = [task.id for task in _get_ready_tasks(unfinished, finished_tasks)]
            task_to_node = {item.job.id: (node, item.start_time, item.end_time) for (node, items) in fixed_schedule_part.mapping.items() for item in items if is_last_version_of_task_executing(item)}


         def is_child_ready(child):
            ids = set([p.id for p in child.parents])
            result = False in [id in finished_tasks for id in ids]
            return not result


         def find_slots(node, comm_ready, runtime):
             node_schedule = schedule_mapping.get(node, list())
             free_time = 0 if len(node_schedule) == 0 else node_schedule[-1].end_time
             ## TODO: refactor it later
             f_time = max(free_time, comm_ready)
             f_time = max(f_time, current_time)
             base_variant = [(f_time, f_time + runtime + 1)]
             zero_interval = [] if len(node_schedule) == 0 else [(0, node_schedule[0].start_time)]
             middle_intervals = [(node_schedule[i].end_time, node_schedule[i + 1].start_time) for i in range(len(node_schedule) - 1)]
             intervals = zero_interval + middle_intervals + base_variant

             #result = [(st, end) for (st, end) in intervals if st >= comm_ready and end - st >= runtime]
             ## TODO: rethink rounding
             result = [(st, end) for (st, end) in intervals if (current_time < st or abs((current_time - st)) < 0.01) and st >= comm_ready and (runtime < (end - st) or abs((end - st) - runtime) < 0.01)]
             return result

         def comm_ready_func(task, node):
                ##TODO: remake this stub later.
                if len(task.parents) == 1 and self.workflow.head_task.id == list(task.parents)[0].id:
                    return 0
                return max([task_to_node[p.id][2] + estimate(node, task_to_node[p.id][0], task, p) for p in task.parents])



         def get_possible_execution_times(task, node):
            ## pay attention to the last element in the resulted seq
            ## it represents all available time of node after it completes all its work
            ## (if such interval can exist)
            ## time_slots = [(st1, end1),(st2, end2,...,(st_last, st_last + runtime)]
            runtime = self.estimator.estimate_runtime(task, node)
            comm_ready = comm_ready_func(task, node)
            time_slots = find_slots(node, comm_ready, runtime)
            return time_slots, runtime

         while len(ready_tasks) > 0:
            choosed_index = random.randint(0, len(ready_tasks) - 1)
            task = self.task_map[ready_tasks[choosed_index]]

            #TODO: make checking for all nodes are dead.(It's a very rare situation so it is not consider for now)
            alive_nodes = [node for node in self.nodes if node.state != Node.Down]
            choosed_node_index = random.randint(0, len(alive_nodes) - 1)
            node = alive_nodes[choosed_node_index]


            time_slots, runtime = get_possible_execution_times(task, node)
            choosed_time_index = 0 if len(time_slots) == 1 else random.randint(0, len(time_slots) - 1)
            time_slot = time_slots[choosed_time_index]

            start_time = time_slot[0]
            end_time = start_time + runtime

            item = ScheduleItem(task, start_time, end_time)
            ##schedule_mapping[node].append(item)
            Schedule.insert_item(schedule_mapping, node, item)
            task_to_node[task.id] = (node, start_time, end_time)

            ##print('I am here')
            ready_tasks.remove(task.id)
            finished_tasks.add(task.id)

            ready_children = [child for child in task.children if is_child_ready(child)]
            for child in ready_children:
                ready_tasks.append(child.id)

         schedule = Schedule(schedule_mapping)
         return schedule

Example #35

0

Show file

    def __call__(self, chromo, current_time):

        count_of_tasks = lambda mapping: reduce(operator.add, (
            len(tasks) for node, tasks in mapping.items()), 0)
        alive_nodes = [node for node in self.nodes if node.state != Node.Down]

        alive_nodes_names = [node.name for node in alive_nodes]
        for node_name, tasks in chromo.items():
            if node_name not in alive_nodes_names and len(tasks) > 0:
                raise ValueError(
                    "Chromo is invalid. There is a task assigned to a dead node"
                )
        if count_of_tasks(chromo) + len(
                self.fixed_schedule_part.get_unfailed_tasks_ids()) != len(
                    self.workflow.get_all_unique_tasks()):

            print("==Chromosome==================================")
            print(chromo)
            print("=fixed_schedule_part===================================")
            print(self.fixed_schedule_part)

            raise Exception(
                "The chromosome not a full. Chromo length: {0}, Fixed part length: {1}, workflow size: {2}"
                .format(count_of_tasks(chromo),
                        len(self.fixed_schedule_part.get_unfailed_tasks_ids()),
                        len(self.workflow.get_all_unique_tasks())))

        # TODO: add not to schedule
        #if count_of_tasks(chromo) + count_of_tasks(self.fixed_schedule_part.mapping) !=

        (schedule_mapping, finished_tasks, ready_tasks, chrmo_mapping,
         task_to_node) = self._create_helping_structures(chromo)

        #chromo_copy = {nd_name: [item for item in items] for (nd_name, items) in chromo.items()}
        chromo_copy = deepcopy(chromo)

        if len(alive_nodes) == 0:
            raise Exception("There are not alive nodes")

        #print("Building started...")
        while len(ready_tasks) > 0:

            # ## TODO: only for debug. Remove it later.
            # print("alive nodes: {0}".format(alive_nodes))
            # for node_name, tasks in chromo_copy.items():
            #     print("Node: {0}, tasks count: {1}".format(node_name, len(tasks)))

            count_before = count_of_tasks(chromo_copy)
            if len(alive_nodes) == 0:
                raise ValueError("Count of alive_nodes is zero")
            for node in alive_nodes:
                if len(chromo_copy[node.name]) == 0:
                    continue
                ## TODO: Urgent! completely rethink this procedure

                tsk_id = None
                for i in range(len(chromo_copy[node.name])):
                    if chromo_copy[node.name][i] in ready_tasks:
                        tsk_id = chromo_copy[node.name][i]
                        break

                if tsk_id is not None:
                    task = self.task_map[tsk_id]
                    #del chromo_copy[node.name][0]
                    chromo_copy[node.name].remove(tsk_id)
                    ready_tasks.remove(tsk_id)

                    (start_time, end_time) = place_task_to_schedule(
                        self.workflow, self.estimator, schedule_mapping,
                        task_to_node, chrmo_mapping, task, node, current_time)

                    task_to_node[task.id] = (node, start_time, end_time)

                    finished_tasks.add(task.id)

                    ready_children = self._get_ready_tasks(
                        task.children, finished_tasks)
                    for child in ready_children:
                        ready_tasks.append(child.id)
            count_after = count_of_tasks(chromo_copy)
            if count_before == count_after:
                raise Exception(
                    "Unable to properly process a chromosome."
                    " Perhaps, due to invalid fixed_schedule_part or the chromosome."
                )
            pass
        schedule = Schedule(schedule_mapping)
        return schedule

Example #36

0

Show file

File: ScheduleBuilder.py Project: fonhorst/heft

    def __call__(self, chromo, current_time):

        (schedule_mapping, finished_tasks, ready_tasks, chrmo_mapping, task_to_node) = self._create_helping_structures(chromo)

        chromo_copy = dict()
        for (nd_name, items) in chromo.items():
            chromo_copy[nd_name] = []
            for item in items:
                chromo_copy[nd_name].append(item)

        alive_nodes = [node for node in self.nodes if node.state != Node.Down]
        if len(alive_nodes) == 0:
            raise Exception("There are not alive nodes")

        while len(ready_tasks) > 0:

            for node in alive_nodes:
                if len(chromo_copy[node.name]) == 0:
                    continue
                if node.state == Node.Down:
                    continue

                ## TODO: Urgent! completely rethink this procedure

                tsk_id = None
                for i in range(len(chromo_copy[node.name])):
                    if chromo_copy[node.name][i] in ready_tasks:
                        tsk_id = chromo_copy[node.name][i]
                        break


                if tsk_id is not None:
                    task = self.task_map[tsk_id]
                    #del chromo_copy[node.name][0]
                    chromo_copy[node.name].remove(tsk_id)
                    ready_tasks.remove(tsk_id)

                    time_slots, runtime = self._get_possible_execution_times(
                                                    schedule_mapping,
                                                    task_to_node,
                                                    chrmo_mapping,
                                                    task,
                                                    node,
                                                    current_time)

                    time_slot = next(time_slots)
                    start_time = time_slot[0]
                    end_time = start_time + runtime

                    item = ScheduleItem(task, start_time, end_time)

                    # need to account current time
                    Schedule.insert_item(schedule_mapping, node, item)
                    task_to_node[task.id] = (node, start_time, end_time)

                    finished_tasks.add(task.id)

                    #ready_children = [child for child in task.children if self._is_child_ready(finished_tasks, child)]
                    ready_children = self._get_ready_tasks(task.children, finished_tasks)
                    for child in ready_children:
                        ready_tasks.append(child.id)


        schedule = Schedule(schedule_mapping)
        return schedule