Example #1
0
    def schedule_task(self, workflow_uuid: str, task_id: int) -> None:
        """Schedule task according to Min-MinBUDG algorithm.

        :param workflow_uuid: UUID of workflow that is scheduled.
        :param task_id: task ID to schedule.
        :return: None.
        """

        current_time = self.event_loop.get_current_time()

        workflow = self.workflows[workflow_uuid]
        task = workflow.tasks[task_id]

        # Find best host for task.
        host, pot, exec_time = self._get_best_host(task=task, pot=workflow.pot)
        workflow.pot = pot

        # Get VM for task (or init new one).
        vm = None
        if host.type == HostType.VMType:
            vm = self.vm_manager.init_vm(host.host)

            # Save info to metric collector.
            self.collector.initialized_vms += 1
            self.collector.workflows[workflow_uuid].initialized_vms.append(vm)
        elif host.type == HostType.VMInstance:
            vm = host.host

        # IMPORTANT: time for provisioning does not add up to exec time
        #  as it has already been taken into account.
        # Provision VM if required.
        if vm.get_state() == vms.State.NOT_PROVISIONED:
            self.vm_manager.provision_vm(vm=vm, time=current_time)

        # Provision container if required.
        if not vm.check_if_container_provisioned(container=task.container):
            vm.provision_container(container=task.container)

        exec_price = cst.calculate_price_for_vm(
            current_time=current_time,
            use_time=exec_time,
            vm=vm,
        )

        # Reserve VM and submit event to event loop.
        self.vm_manager.reserve_vm(vm=vm, task=task)

        finish_time = current_time + timedelta(seconds=exec_time)
        self.event_loop.add_event(event=Event(
            start_time=finish_time,
            event_type=EventType.FINISH_TASK,
            task=task,
            vm=vm,
        ))

        # Save info to metric collector.
        self.collector.workflows[workflow_uuid].used_vms.add(vm)
        self.collector.used_vms.add(vm)
        self.collector.workflows[workflow_uuid].cost += exec_price
Example #2
0
    def _find_cheapest_vm_for_task(
        self,
        task: Task,
        idle_vms: set[vms.VM],
    ) -> tp.Optional[vms.VM]:
        """Find VM that can finish task before its deadline with minimum
        cost. Return None if there is no such VMs.

        :param task: task to execute on VMs.
        :param idle_vms: set of idle VMs.
        :return: best VM or None.
        """

        minimum_cost: tp.Optional[float] = None
        best_vm: tp.Optional[vms.VM] = None

        current_time = self.event_loop.get_current_time()

        for vm in idle_vms:
            total_exec_time = self.predict_func(
                task=task,
                vm_type=vm.type,
                storage=self.storage_manager.get_storage(),
                vm=vm,
            )

            if not vm.check_if_container_provisioned(task.container):
                total_exec_time += task.container.provision_time

            possible_finish_time = (current_time +
                                    timedelta(seconds=total_exec_time))

            # Doesn't fit deadline, so skip it.
            if possible_finish_time > task.deadline:
                continue

            possible_cost = cst.calculate_price_for_vm(
                current_time=current_time,
                use_time=total_exec_time,
                vm=vm,
            )

            if minimum_cost is None or possible_cost < minimum_cost:
                minimum_cost = possible_cost
                best_vm = vm

        return best_vm
Example #3
0
    def schedule_task(self, workflow_uuid: str, task_id: int) -> None:
        """Schedule task according to EBPSM algorithm.

        :param workflow_uuid: UUID of workflow that is scheduled.
        :param task_id: task ID to schedule.
        :return: None.
        """

        current_time = self.event_loop.get_current_time()

        workflow = self.workflows[workflow_uuid]
        task = workflow.tasks[task_id]
        vm: tp.Optional[vms.VM] = None

        idle_vms = self.vm_manager.get_idle_vms()

        if idle_vms:
            # If there are idle VMs, try to reuse the fastest one within
            # task's budget.
            best_time: tp.Optional[float] = None

            for v in idle_vms:
                exec_time = self.predict_func(
                    task=task,
                    vm_type=v.type,
                    storage=self.storage_manager.get_storage(),
                    vm=v,
                    container_prov=task.container.provision_time,
                    vm_prov=self.vm_manager.get_provision_delay(),
                )
                possible_finish_time = (current_time +
                                        timedelta(seconds=exec_time))
                cost = v.calculate_cost(time=possible_finish_time)

                if cost > task.budget:
                    continue

                if best_time is None or exec_time < best_time:
                    best_time = exec_time
                    vm = v

        if vm is None:
            # If there is no idle VMs, find fastest VM type withing
            # task's budget and provision VM with this type.
            fastest_vmt = self._find_fastest_vm_type_within_budget(
                task=task,
                budget=task.budget,
            )

            if fastest_vmt is None:
                vm_type = self.vm_manager.get_slowest_vm_type()
            else:
                vm_type = fastest_vmt.vm_type

            vm = self.vm_manager.init_vm(vm_type=vm_type)
            self.collector.initialized_vms += 1
            self.collector.workflows[workflow_uuid].initialized_vms.append(vm)

        # Schedule task.
        total_exec_time = 0.0

        # Provision VM if required.
        if vm.get_state() == vms.State.NOT_PROVISIONED:
            self.vm_manager.provision_vm(vm=vm, time=current_time)
            total_exec_time += self.vm_manager.get_provision_delay()

        # Provision container if required.
        if not vm.check_if_container_provisioned(container=task.container):
            vm.provision_container(container=task.container)
            total_exec_time += task.container.provision_time

        # Get task execution time.
        total_exec_time += self.predict_func(
            task=task,
            vm_type=vm.type,
            storage=self.storage_manager.get_storage(),
            vm=vm,
        )
        exec_price = cst.calculate_price_for_vm(
            current_time=current_time,
            use_time=total_exec_time,
            vm=vm,
        )

        # Set task's execution price.
        finish_time = current_time + timedelta(seconds=total_exec_time)
        task.execution_price = vm.calculate_cost(time=finish_time)

        # Reserve VM and submit event to event loop.
        self.vm_manager.reserve_vm(vm=vm, task=task)

        self.event_loop.add_event(event=Event(
            start_time=finish_time,
            event_type=EventType.FINISH_TASK,
            task=task,
            vm=vm,
        ))

        # Save info to metric collector.
        self.collector.workflows[workflow_uuid].used_vms.add(vm)
        self.collector.used_vms.add(vm)
        self.collector.workflows[workflow_uuid].cost += exec_price
Example #4
0
    def schedule_task(self, workflow_uuid: str, task_id: int) -> None:
        """Schedule task according to Dyna algorithm.

        :param workflow_uuid: UUID of workflow that is scheduled.
        :param task_id: task ID to schedule.
        :return: None.
        """

        current_time = self.event_loop.get_current_time()

        workflow = self.workflows[workflow_uuid]
        task = workflow.tasks[task_id]
        required_vm_type = workflow.configuration_plan.plan[task_id]

        idle_vms = self.vm_manager.get_idle_vms()
        vm: tp.Optional[vms.VM] = None

        # Find idle_vm with VM type from configuration plan.
        for idle_vm in idle_vms:
            if idle_vm.type == required_vm_type:
                vm = idle_vm
                break

        # If no VM was found -- init new one.
        if vm is None:
            vm = self.vm_manager.init_vm(vm_type=required_vm_type)

            # Save info to metric collector.
            self.collector.initialized_vms += 1
            self.collector.workflows[workflow_uuid].initialized_vms.append(vm)

        # Schedule task.
        total_exec_time = 0.0

        # Provision VM if required.
        if vm.get_state() == vms.State.NOT_PROVISIONED:
            self.vm_manager.provision_vm(vm=vm, time=current_time)
            total_exec_time += self.vm_manager.get_provision_delay()

        # Provision container if required.
        if not vm.check_if_container_provisioned(container=task.container):
            vm.provision_container(container=task.container)
            total_exec_time += task.container.provision_time

        # Get task execution time.
        total_exec_time += self.predict_func(
            task=task,
            vm_type=vm.type,
            storage=self.storage_manager.get_storage(),
            vm=vm,
        )
        exec_price = cst.calculate_price_for_vm(
            current_time=current_time,
            use_time=total_exec_time,
            vm=vm,
        )

        # Reserve VM and submit event to event loop.
        self.vm_manager.reserve_vm(vm=vm, task=task)

        finish_time = current_time + timedelta(seconds=total_exec_time)
        self.event_loop.add_event(event=Event(
            start_time=finish_time,
            event_type=EventType.FINISH_TASK,
            task=task,
            vm=vm,
        ))

        # Save info to metric collector.
        self.collector.workflows[workflow_uuid].used_vms.add(vm)
        self.collector.used_vms.add(vm)
        self.collector.workflows[workflow_uuid].cost += exec_price
Example #5
0
    def _get_best_host(
        self,
        task: Task,
        pot: float,
    ) -> tp.Tuple[Host, float, float]:
        """Find best host for task and return it.
        Host is either idle VM instance or VM type (so VM should be
        provisioned).

        :param task: task for finding host.
        :param pot: total amount of money left from previous tasks.
        :return: Tuple[host for task, new pot, execution time on host].
        """

        current_time = self.event_loop.get_current_time()
        vm_prov = self.vm_manager.get_provision_delay()

        total_budget = task.budget + pot
        new_pot = 0.0

        # Initialize with cheapest VM type.
        best_host = Host(type=HostType.VMType,
                         host=self.vm_manager.get_slowest_vm_type())
        best_finish_time = self.predict_func(
            task=task,
            vm_type=best_host.host,
            storage=self.storage_manager.get_storage(),
            container_prov=task.container.provision_time,
            vm_prov=vm_prov,
        )

        # Find better host among all VM types.
        for vm_type in self.vm_manager.get_vm_types():
            execution_time = self.predict_func(
                task=task,
                vm_type=vm_type,
                storage=self.storage_manager.get_storage(),
                container_prov=task.container.provision_time,
                vm_prov=vm_prov,
            )
            execution_price = cst.estimate_price_for_vm_type(
                use_time=execution_time,
                vm_type=vm_type,
            )

            # If current host can finish task faster within budget --
            # select it.
            if (execution_time < best_finish_time
                    and execution_price <= total_budget):
                best_finish_time = execution_time
                new_pot = total_budget - execution_price
                best_host = Host(type=HostType.VMType, host=vm_type)

        # Find better host among idle VMs.
        for vm in self.vm_manager.get_idle_vms():
            execution_time = self.predict_func(
                task=task,
                vm_type=vm.type,
                storage=self.storage_manager.get_storage(),
                container_prov=task.container.provision_time,
                vm_prov=vm_prov,
                vm=vm,
            )
            execution_price = cst.calculate_price_for_vm(
                current_time=current_time,
                use_time=execution_time,
                vm=vm,
            )

            # If current host can finish task faster within budget --
            # select it.
            if (execution_time < best_finish_time
                    and execution_price <= total_budget):
                best_finish_time = execution_time
                new_pot = total_budget - execution_price
                best_host = Host(type=HostType.VMInstance, host=vm)

        return best_host, new_pot, best_finish_time
Example #6
0
    def schedule_task(self, workflow_uuid: str, task_id: int) -> None:
        """Schedule task according to EPSM algorithm.

        :param workflow_uuid: UUID of workflow that is scheduled.
        :param task_id: task ID to schedule.
        :return: None.
        """

        current_time = self.event_loop.get_current_time()

        workflow = self.workflows[workflow_uuid]
        task = workflow.tasks[task_id]

        idle_vms_with_input = self.vm_manager.get_idle_vms(task=task)

        # Search for VM with task's input files.
        vm = self._find_cheapest_vm_for_task(
            task=task,
            idle_vms=idle_vms_with_input,
        )

        # If no available VM with input files, search for VM with
        # task's provisioned container.
        if vm is None:
            idle_vms_with_container = self.vm_manager.get_idle_vms(
                container=task.container)
            vm = self._find_cheapest_vm_for_task(
                task=task,
                idle_vms=idle_vms_with_container,
            )

            # If no available VM with container, search just for idle
            # VMs.
            if vm is None:
                idle_vms = (self.vm_manager.get_idle_vms() -
                            idle_vms_with_input - idle_vms_with_container)

                vm = self._find_cheapest_vm_for_task(
                    task=task,
                    idle_vms=idle_vms,
                )

                # If no available idle VM, try to delay task scheduling
                # until next scheduling phase.
                if vm is None:
                    time_left = (task.deadline - current_time).total_seconds()
                    spare_time = (time_left - task.execution_time_prediction -
                                  self.settings.scheduling_interval)

                    # If there is no time for delaying, initialize new
                    # VM for cheapest price that can finish task on
                    # time.
                    if spare_time <= 0 or not task.parents:
                        cheapest_vmt = self._find_cheapest_vm_type_for_task(
                            task=task,
                            vm_types=self.vm_manager.get_vm_types(),
                        )

                        vm = self.vm_manager.init_vm(vm_type=cheapest_vmt)

                        # Save info to metric collector.
                        self.collector.initialized_vms += 1
                        self.collector.workflows[
                            workflow_uuid].initialized_vms.append(vm)

        # If no VM found, it is possible to postpone task scheduling.
        if vm is None:
            scheduling_time = current_time + timedelta(
                seconds=self.settings.scheduling_interval)

            self.event_loop.add_event(event=Event(
                start_time=scheduling_time,
                event_type=EventType.SCHEDULE_TASK,
                task=task,
            ))

            return

        # If VM was found, calculate execution time and schedule task.
        if vm is not None:
            total_exec_time = 0.0

            # Provision VM if required.
            if vm.get_state() == vms.State.NOT_PROVISIONED:
                self.vm_manager.provision_vm(vm=vm, time=current_time)
                total_exec_time += self.vm_manager.get_provision_delay()

            # Provision container if required.
            if not vm.check_if_container_provisioned(container=task.container):
                vm.provision_container(container=task.container)
                total_exec_time += task.container.provision_time

            # Get task execution time.
            total_exec_time += self.predict_func(
                task=task,
                vm_type=vm.type,
                storage=self.storage_manager.get_storage(),
                vm=vm,
            )
            exec_price = cst.calculate_price_for_vm(
                current_time=current_time,
                use_time=total_exec_time,
                vm=vm,
            )

            # Reserve VM and submit event to event loop.
            self.vm_manager.reserve_vm(vm=vm, task=task)

            finish_time = current_time + timedelta(seconds=total_exec_time)
            self.event_loop.add_event(event=Event(
                start_time=finish_time,
                event_type=EventType.FINISH_TASK,
                task=task,
                vm=vm,
            ))

            # Save info to metric collector.
            self.collector.workflows[workflow_uuid].used_vms.add(vm)
            self.collector.used_vms.add(vm)
            self.collector.workflows[workflow_uuid].cost += exec_price