Example #1
0
    def __init__(self, task_server: 'TaskServer', use_docker_manager=True,
                 finished_cb=None) -> None:
        self.task_server = task_server
        # Id of the task that we're currently waiting for  for
        self.waiting_for_task: Optional[str] = None
        # Id of the task that we're currently computing
        self.counting_task = None
        # TaskThread
        self.counting_thread = None
        self.task_requested = False
        # Is task computer currently able to run computation?
        self.runnable = True
        self.listeners = []
        self.last_task_request = time.time()

        # when we should stop waiting for the task
        self.waiting_deadline = None

        self.dir_manager = None
        self.resource_manager: Optional[ResourcesManager] = None
        self.task_request_frequency = None
        # Is there a time limit after which we don't wait for task timeout
        # anymore
        self.use_waiting_deadline = False
        self.waiting_for_task_session_timeout = None

        self.docker_manager: DockerManager = DockerManager.install()
        if use_docker_manager:
            self.docker_manager.check_environment()

        self.use_docker_manager = use_docker_manager
        run_benchmarks = self.task_server.benchmark_manager.benchmarks_needed()
        deferred = self.change_config(
            task_server.config_desc, in_background=False,
            run_benchmarks=run_benchmarks)
        try:
            sync_wait(deferred, BENCHMARK_TIMEOUT)
        except TimeoutError:
            logger.warning('Benchmark computation timed out')

        self.stats = IntStatsKeeper(CompStats)

        self.assigned_subtask: Optional[Dict[str, Any]] = None
        self.max_assigned_tasks = 1

        self.delta = None
        self.last_task_timeout_checking = None
        self.support_direct_computation = False
        # Should this node behave as provider and compute tasks?
        self.compute_tasks = task_server.config_desc.accept_tasks \
            and not task_server.config_desc.in_shutdown
        self.finished_cb = finished_cb
Example #2
0
    def test_for_race_conditions(self):
        n_threads = 10
        n_updates = 5
        n_expected = n_threads * n_updates

        sk = IntStatsKeeper(CompStats)

        def increase_stat():
            n = 0
            while n < n_updates:
                sk.increase_stat("computed_tasks")
                n += 1

        threads = [Thread(target=increase_stat) for _ in range(n_threads)]

        for t in threads:
            t.start()
        for t in threads:
            t.join()

        self.assertEqual(sk.session_stats.computed_tasks, n_expected)
        self.assertEqual(sk.global_stats.computed_tasks, n_expected)
Example #3
0
class TaskComputer(object):
    """ TaskComputer is responsible for task computations that take
    place in Golem application. Tasks are started
    in separate threads.
    """

    lock = Lock()
    dir_lock = Lock()

    def __init__(self, task_server: 'TaskServer', use_docker_manager=True,
                 finished_cb=None) -> None:
        self.task_server = task_server
        # Id of the task that we're currently waiting for  for
        self.waiting_for_task: Optional[str] = None
        # Id of the task that we're currently computing
        self.counting_task = None
        # TaskThread
        self.counting_thread = None
        self.task_requested = False
        # Is task computer currently able to run computation?
        self.runnable = True
        self.listeners = []
        self.last_task_request = time.time()

        # when we should stop waiting for the task
        self.waiting_deadline = None

        self.dir_manager = None
        self.resource_manager: Optional[ResourcesManager] = None
        self.task_request_frequency = None
        # Is there a time limit after which we don't wait for task timeout
        # anymore
        self.use_waiting_deadline = False
        self.waiting_for_task_session_timeout = None

        self.docker_manager: DockerManager = DockerManager.install()
        if use_docker_manager:
            self.docker_manager.check_environment()

        self.use_docker_manager = use_docker_manager
        run_benchmarks = self.task_server.benchmark_manager.benchmarks_needed()
        deferred = self.change_config(
            task_server.config_desc, in_background=False,
            run_benchmarks=run_benchmarks)
        try:
            sync_wait(deferred, BENCHMARK_TIMEOUT)
        except TimeoutError:
            logger.warning('Benchmark computation timed out')

        self.stats = IntStatsKeeper(CompStats)

        self.assigned_subtask: Optional[Dict[str, Any]] = None
        self.max_assigned_tasks = 1

        self.delta = None
        self.last_task_timeout_checking = None
        self.support_direct_computation = False
        # Should this node behave as provider and compute tasks?
        self.compute_tasks = task_server.config_desc.accept_tasks \
            and not task_server.config_desc.in_shutdown
        self.finished_cb = finished_cb

    def task_given(self, ctd):
        if self.assigned_subtask is not None:
            logger.error("Trying to assign a task, when it's already assigned")
            return False
        self.wait(ttl=deadline_to_timeout(ctd['deadline']))
        self.assigned_subtask = ctd
        self.__request_resource(
            ctd['task_id'],
            ctd['subtask_id']
        )
        return True

    def task_resource_collected(self, task_id, unpack_delta=True):
        subtask = self.assigned_subtask
        if not subtask or subtask['task_id'] != task_id:
            logger.error("Resource collected for a wrong task, %s", task_id)
            return False
        if unpack_delta:
            rs_dir = self.dir_manager.get_task_resource_dir(task_id)
            self.task_server.unpack_delta(rs_dir, self.delta, task_id)
        self.delta = None
        self.last_task_timeout_checking = time.time()
        self.__compute_task(
            subtask['subtask_id'],
            subtask['docker_images'],
            subtask['src_code'],
            subtask['extra_data'],
            subtask['deadline'])
        return True

    def task_resource_failure(self, task_id, reason):
        subtask = self.assigned_subtask
        if not subtask or subtask['task_id'] != task_id:
            logger.error("Resource failure for a wrong task, %s", task_id)
            return
        self.task_server.send_task_failed(
            subtask['subtask_id'],
            subtask['task_id'],
            'Error downloading resources: {}'.format(reason),
        )
        self.session_closed()

    def wait_for_resources(self, task_id, delta):
        if self.assigned_subtask and \
                self.assigned_subtask['task_id'] == task_id:
            self.delta = delta

    def task_request_rejected(self, task_id, reason):
        logger.info("Task %r request rejected: %r", task_id, reason)

    def task_computed(self, task_thread: TaskThread) -> None:
        self.reset()

        if task_thread.end_time is None:
            task_thread.end_time = time.time()

        with self.lock:
            if self.counting_thread is task_thread:
                self.counting_thread = None

        work_wall_clock_time = task_thread.end_time - task_thread.start_time
        subtask_id = task_thread.subtask_id
        try:
            subtask = self.assigned_subtask
            assert subtask is not None
            self.assigned_subtask = None
            # get paid for max working time,
            # thus task withholding won't make profit
            task_header = \
                self.task_server.task_keeper.task_headers[subtask['task_id']]
            work_time_to_be_paid = task_header.subtask_timeout

        except KeyError:
            logger.error("No subtask with id %r", subtask_id)
            return

        was_success = False

        if task_thread.error or task_thread.error_msg:

            if "Task timed out" in task_thread.error_msg:
                self.stats.increase_stat('tasks_with_timeout')
            else:
                self.stats.increase_stat('tasks_with_errors')
                self.task_server.send_task_failed(
                    subtask_id,
                    subtask['task_id'],
                    task_thread.error_msg,
                )

        elif task_thread.result and 'data' in task_thread.result:

            logger.info("Task %r computed, work_wall_clock_time %s",
                        subtask_id,
                        str(work_wall_clock_time))
            self.stats.increase_stat('computed_tasks')
            self.task_server.send_results(
                subtask_id,
                subtask['task_id'],
                task_thread.result,
            )
            was_success = True

        else:
            self.stats.increase_stat('tasks_with_errors')
            self.task_server.send_task_failed(
                subtask_id,
                subtask['task_id'],
                "Wrong result format",
            )

        dispatcher.send(signal='golem.monitor', event='computation_time_spent',
                        success=was_success, value=work_time_to_be_paid)

        self.counting_task = None
        if self.finished_cb:
            self.finished_cb()

    def run(self):
        """ Main loop of task computer """
        if self.counting_task:
            if self.counting_thread is not None:
                self.counting_thread.check_timeout()
        elif self.compute_tasks and self.runnable:
            if not self.waiting_for_task:
                last_request = time.time() - self.last_task_request
                if last_request > self.task_request_frequency \
                        and self.counting_thread is None:
                    self.__request_task()
            elif self.use_waiting_deadline:
                if self.waiting_deadline < time.time():
                    self.reset()

    def get_progress(self) -> Optional[ComputingSubtaskStateSnapshot]:
        if self.counting_thread is None:
            return None

        c: TaskThread = self.counting_thread
        tcss = ComputingSubtaskStateSnapshot(
            subtask_id=c.get_subtask_id(),
            progress=c.get_progress(),
            seconds_to_timeout=c.task_timeout,
            running_time_seconds=(time.time() - c.start_time),
            **c.extra_data,
        )

        return tcss

    def get_environment(self):
        task_header = self.task_server.task_keeper.task_headers.get(
            self.counting_task)
        return task_header.fixed_header.environment,

    def change_config(self, config_desc, in_background=True,
                      run_benchmarks=False):
        self.dir_manager = DirManager(
            self.task_server.get_task_computer_root())
        self.resource_manager = ResourcesManager(self.dir_manager, self)
        self.task_request_frequency = config_desc.task_request_interval
        self.waiting_for_task_session_timeout = \
            config_desc.waiting_for_task_session_timeout
        self.compute_tasks = config_desc.accept_tasks \
            and not config_desc.in_shutdown
        return self.change_docker_config(
            config_desc=config_desc,
            run_benchmarks=run_benchmarks,
            work_dir=Path(self.dir_manager.root_path),
            in_background=in_background)

    def config_changed(self):
        for l in self.listeners:
            l.config_changed()

    def change_docker_config(
            self,
            config_desc: ClientConfigDescriptor,
            run_benchmarks: bool,
            work_dir: Path,
            in_background: bool = True
    ) -> Optional[Deferred]:

        dm = self.docker_manager
        assert isinstance(dm, DockerManager)
        dm.build_config(config_desc)

        deferred = Deferred()
        if not dm.hypervisor and run_benchmarks:
            self.task_server.benchmark_manager.run_all_benchmarks(
                deferred.callback, deferred.errback
            )
            return deferred

        if dm.hypervisor and self.use_docker_manager:  # noqa pylint: disable=no-member
            self.lock_config(True)

            def status_callback():
                return self.counting_task

            def done_callback(config_differs):
                if run_benchmarks or config_differs:
                    self.task_server.benchmark_manager.run_all_benchmarks(
                        deferred.callback, deferred.errback
                    )
                else:
                    deferred.callback('Benchmarks not executed')
                logger.debug("Resuming new task computation")
                self.lock_config(False)
                self.runnable = True

            self.runnable = False
            # PyLint thinks dm is of type DockerConfigManager not DockerManager
            # pylint: disable=no-member
            dm.update_config(
                status_callback=status_callback,
                done_callback=done_callback,
                work_dir=work_dir,
                in_background=in_background)

            return deferred

        return None

    def register_listener(self, listener):
        self.listeners.append(listener)

    def lock_config(self, on=True):
        for l in self.listeners:
            l.lock_config(on)

    def session_timeout(self):
        self.session_closed()

    def session_closed(self):
        if self.counting_task is None:
            self.reset()

    def wait(self, wait=True, ttl=None):
        self.use_waiting_deadline = wait
        if ttl is None:
            ttl = self.waiting_for_task_session_timeout

        self.waiting_deadline = time.time() + ttl

    def reset(self, counting_task=None):
        self.counting_task = counting_task
        self.use_waiting_deadline = False
        self.task_requested = False
        self.waiting_for_task = None
        self.waiting_deadline = None

    def __request_task(self):
        with self.lock:
            perform_request = not self.waiting_for_task and \
                (self.counting_task is None)

        if not perform_request:
            return

        now = time.time()
        self.wait()
        self.last_task_request = now
        self.waiting_for_task = self.task_server.request_task()
        if self.waiting_for_task is not None:
            self.stats.increase_stat('tasks_requested')

    def __request_resource(self, task_id, subtask_id):
        self.wait(False)
        if not self.task_server.request_resource(task_id, subtask_id):
            self.reset()

    def __compute_task(self, subtask_id, docker_images,
                       src_code, extra_data, subtask_deadline):
        task_id = self.assigned_subtask['task_id']
        task_header = self.task_server.task_keeper.task_headers.get(task_id)

        if not task_header:
            logger.warning("Subtask '%s' of task '%s' cannot be computed: "
                           "task header has been unexpectedly removed",
                           subtask_id, task_id)
            return self.session_closed()

        deadline = min(task_header.deadline, subtask_deadline)
        task_timeout = deadline_to_timeout(deadline)

        unique_str = str(uuid.uuid4())

        logger.info("Starting computation of subtask %r (task: %r, deadline: "
                    "%r, docker images: %r)", subtask_id, task_id, deadline,
                    docker_images)

        self.reset(counting_task=task_id)

        with self.dir_lock:
            resource_dir = self.resource_manager.get_resource_dir(task_id)
            temp_dir = os.path.join(
                self.resource_manager.get_temporary_dir(task_id), unique_str)
            # self.dir_manager.clear_temporary(task_id)

            if not os.path.exists(temp_dir):
                os.makedirs(temp_dir)

        if docker_images:
            docker_images = [DockerImage(**did) for did in docker_images]
            dir_mapping = DockerTaskThread.generate_dir_mapping(resource_dir,
                                                                temp_dir)
            tt = DockerTaskThread(subtask_id, docker_images,
                                  src_code, extra_data,
                                  dir_mapping, task_timeout)
        elif self.support_direct_computation:
            tt = PyTaskThread(subtask_id, src_code,
                              extra_data, resource_dir, temp_dir,
                              task_timeout)
        else:
            logger.error("Cannot run PyTaskThread in this version")
            subtask = self.assigned_subtask
            self.assigned_subtask = None
            self.task_server.send_task_failed(
                subtask_id,
                subtask['task_id'],
                "Host direct task not supported",
            )
            self.counting_task = None
            if self.finished_cb:
                self.finished_cb()

            return

        with self.lock:
            self.counting_thread = tt

        tt.start().addBoth(lambda _: self.task_computed(tt))

    def quit(self):
        if self.counting_thread is not None:
            self.counting_thread.end_comp()
Example #4
0
    def __init__(self,
                 node_name,
                 task_server,
                 use_docker_machine_manager=True):
        """ Create new task computer instance
        :param node_name:
        :param task_server:
        :return:
        """
        self.node_name = node_name
        self.task_server = task_server
        self.waiting_for_task = None
        self.counting_task = False
        self.task_requested = False
        self.runnable = True
        self.listeners = []
        self.current_computations = []
        self.last_task_request = time.time()

        self.waiting_ttl = 0
        self.last_checking = time.time()

        self.dir_manager = None
        self.resource_manager = None
        self.task_request_frequency = None
        self.use_waiting_ttl = None
        self.waiting_for_task_timeout = None
        self.waiting_for_task_session_timeout = None

        self.docker_manager = DockerManager.install()
        if use_docker_machine_manager:
            self.docker_manager.check_environment()

        try:
            lux_perf = float(task_server.config_desc.estimated_lux_performance)
            blender_perf = float(
                task_server.config_desc.estimated_blender_performance)
        except:
            lux_perf = 0
            blender_perf = 0

        if int(lux_perf) == 0 or int(blender_perf) == 0:
            run_benchmarks = True
        else:
            run_benchmarks = False

        self.use_docker_machine_manager = use_docker_machine_manager
        self.change_config(task_server.config_desc,
                           in_background=False,
                           run_benchmarks=run_benchmarks)

        self.stats = IntStatsKeeper(CompStats)

        self.assigned_subtasks = {}
        self.task_to_subtask_mapping = {}
        self.max_assigned_tasks = 1

        self.delta = None
        self.last_task_timeout_checking = None
        self.support_direct_computation = False
        self.compute_tasks = task_server.config_desc.accept_tasks
Example #5
0
class TaskComputer(object):
    """ TaskComputer is responsible for task computations that take place in Golem application. Tasks are started
    in separate threads.
    """

    lock = Lock()
    dir_lock = Lock()

    def __init__(self,
                 node_name,
                 task_server,
                 use_docker_machine_manager=True):
        """ Create new task computer instance
        :param node_name:
        :param task_server:
        :return:
        """
        self.node_name = node_name
        self.task_server = task_server
        self.waiting_for_task = None
        self.counting_task = False
        self.task_requested = False
        self.runnable = True
        self.listeners = []
        self.current_computations = []
        self.last_task_request = time.time()

        self.waiting_ttl = 0
        self.last_checking = time.time()

        self.dir_manager = None
        self.resource_manager = None
        self.task_request_frequency = None
        self.use_waiting_ttl = None
        self.waiting_for_task_timeout = None
        self.waiting_for_task_session_timeout = None

        self.docker_manager = DockerManager.install()
        if use_docker_machine_manager:
            self.docker_manager.check_environment()

        try:
            lux_perf = float(task_server.config_desc.estimated_lux_performance)
            blender_perf = float(
                task_server.config_desc.estimated_blender_performance)
        except:
            lux_perf = 0
            blender_perf = 0

        if int(lux_perf) == 0 or int(blender_perf) == 0:
            run_benchmarks = True
        else:
            run_benchmarks = False

        self.use_docker_machine_manager = use_docker_machine_manager
        self.change_config(task_server.config_desc,
                           in_background=False,
                           run_benchmarks=run_benchmarks)

        self.stats = IntStatsKeeper(CompStats)

        self.assigned_subtasks = {}
        self.task_to_subtask_mapping = {}
        self.max_assigned_tasks = 1

        self.delta = None
        self.last_task_timeout_checking = None
        self.support_direct_computation = False
        self.compute_tasks = task_server.config_desc.accept_tasks

    def task_given(self, ctd):
        if ctd.subtask_id not in self.assigned_subtasks:
            self.wait(ttl=self.waiting_for_task_timeout)
            self.assigned_subtasks[ctd.subtask_id] = ctd
            self.task_to_subtask_mapping[ctd.task_id] = ctd.subtask_id
            self.__request_resource(
                ctd.task_id,
                self.resource_manager.get_resource_header(ctd.task_id),
                ctd.return_address, ctd.return_port, ctd.key_id,
                ctd.task_owner)
            return True
        else:
            return False

    def resource_given(self, task_id):
        if task_id in self.task_to_subtask_mapping:
            subtask_id = self.task_to_subtask_mapping[task_id]
            if subtask_id in self.assigned_subtasks:
                subtask = self.assigned_subtasks[subtask_id]
                timeout = deadline_to_timeout(subtask.deadline)
                self.__compute_task(subtask_id, subtask.docker_images,
                                    subtask.src_code, subtask.extra_data,
                                    subtask.short_description, timeout)
                self.waiting_for_task = None
                return True
            else:
                return False

    def task_resource_collected(self, task_id, unpack_delta=True):
        if task_id in self.task_to_subtask_mapping:
            subtask_id = self.task_to_subtask_mapping[task_id]
            if subtask_id in self.assigned_subtasks:
                subtask = self.assigned_subtasks[subtask_id]
                if unpack_delta:
                    self.task_server.unpack_delta(
                        self.dir_manager.get_task_resource_dir(task_id),
                        self.delta, task_id)
                self.delta = None
                self.last_task_timeout_checking = time.time()
                self.__compute_task(subtask_id, subtask.docker_images,
                                    subtask.src_code, subtask.extra_data,
                                    subtask.short_description,
                                    deadline_to_timeout(subtask.deadline))
                return True
            return False

    def task_resource_failure(self, task_id, reason):
        if task_id in self.task_to_subtask_mapping:
            subtask_id = self.task_to_subtask_mapping.pop(task_id)
            if subtask_id in self.assigned_subtasks:
                subtask = self.assigned_subtasks.pop(subtask_id)
                self.task_server.send_task_failed(
                    subtask_id, subtask.task_id,
                    'Error downloading resources: {}'.format(reason),
                    subtask.return_address, subtask.return_port,
                    subtask.key_id, subtask.task_owner, self.node_name)
            self.session_closed()

    def wait_for_resources(self, task_id, delta):
        if task_id in self.task_to_subtask_mapping:
            subtask_id = self.task_to_subtask_mapping[task_id]
            if subtask_id in self.assigned_subtasks:
                self.delta = delta

    def task_request_rejected(self, task_id, reason):
        logger.warning("Task {} request rejected: {}".format(task_id, reason))

    def resource_request_rejected(self, subtask_id, reason):
        logger.warning("Task {} resource request rejected: {}".format(
            subtask_id, reason))
        self.assigned_subtasks.pop(subtask_id, None)
        self.reset()

    def task_computed(self, task_thread):
        if task_thread.end_time is None:
            task_thread.end_time = time.time()

        with self.lock:
            try:
                self.current_computations.remove(task_thread)
            except ValueError:  # not in list
                pass

        time_ = task_thread.end_time - task_thread.start_time
        subtask_id = task_thread.subtask_id
        try:
            subtask = self.assigned_subtasks.pop(subtask_id)
        except KeyError:
            logger.error("No subtask with id %r", subtask_id)
            return

        if task_thread.error or task_thread.error_msg:
            if "Task timed out" in task_thread.error_msg:
                self.stats.increase_stat('tasks_with_timeout')
            else:
                self.stats.increase_stat('tasks_with_errors')
            self.task_server.send_task_failed(
                subtask_id, subtask.task_id, task_thread.error_msg,
                subtask.return_address, subtask.return_port, subtask.key_id,
                subtask.task_owner, self.node_name)
            dispatcher.send(signal='golem.monitor',
                            event='computation_time_spent',
                            success=False,
                            value=time_)
        elif task_thread.result and 'data' in task_thread.result and 'result_type' in task_thread.result:
            logger.info("Task %r computed", subtask_id)
            self.stats.increase_stat('computed_tasks')
            self.task_server.send_results(subtask_id, subtask.task_id,
                                          task_thread.result, time_,
                                          subtask.return_address,
                                          subtask.return_port, subtask.key_id,
                                          subtask.task_owner, self.node_name)
            dispatcher.send(signal='golem.monitor',
                            event='computation_time_spent',
                            success=True,
                            value=time_)
        else:
            self.stats.increase_stat('tasks_with_errors')
            self.task_server.send_task_failed(
                subtask_id, subtask.task_id, "Wrong result format",
                subtask.return_address, subtask.return_port, subtask.key_id,
                subtask.task_owner, self.node_name)
            dispatcher.send(signal='golem.monitor',
                            event='computation_time_spent',
                            success=False,
                            value=time_)
        self.counting_task = None

    def run(self):
        if self.counting_task:
            for task_thread in self.current_computations:
                task_thread.check_timeout()
        elif self.compute_tasks and self.runnable:
            if not self.waiting_for_task:
                if time.time(
                ) - self.last_task_request > self.task_request_frequency:
                    if len(self.current_computations) == 0:
                        self.__request_task()
            elif self.use_waiting_ttl:
                time_ = time.time()
                self.waiting_ttl -= time_ - self.last_checking
                self.last_checking = time_
                if self.waiting_ttl < 0:
                    self.reset()

    def get_progresses(self):
        ret = {}
        for c in self.current_computations:
            tcss = TaskChunkStateSnapshot(
                c.get_subtask_id(), 0.0, 0.0, c.get_progress(),
                c.get_task_short_desc(
                ))  # FIXME: cpu power and estimated time left
            ret[c.subtask_id] = tcss

        return ret

    def change_config(self,
                      config_desc,
                      in_background=True,
                      run_benchmarks=False):
        self.dir_manager = DirManager(
            self.task_server.get_task_computer_root())
        self.resource_manager = ResourcesManager(self.dir_manager, self)
        self.task_request_frequency = config_desc.task_request_interval
        self.waiting_for_task_timeout = config_desc.waiting_for_task_timeout
        self.waiting_for_task_session_timeout = config_desc.waiting_for_task_session_timeout
        self.compute_tasks = config_desc.accept_tasks
        self.change_docker_config(config_desc, run_benchmarks, in_background)

    def _validate_task_state(self, task_state):
        td = task_state.definition
        if not os.path.exists(td.main_program_file):
            logger.error("Main program file does not exist: {}".format(
                td.main_program_file))
            return False
        return True

    def run_benchmark(self, benchmark, task_builder, datadir, node_name,
                      success_callback, error_callback):
        task_state = TaskDesc()
        task_state.status = TaskStatus.notStarted
        task_state.definition = benchmark.task_definition
        self._validate_task_state(task_state)
        builder = task_builder(node_name, task_state.definition, datadir,
                               self.dir_manager)
        t = Task.build_task(builder)
        br = BenchmarkRunner(t, datadir, success_callback, error_callback,
                             benchmark)
        br.run()

    def run_lux_benchmark(self, success=None, error=None):
        def success_callback(performance):
            cfg_desc = client.config_desc
            cfg_desc.estimated_lux_performance = performance
            client.change_config(cfg_desc)
            self.config_changed()
            if success:
                success(performance)

        def error_callback(err_msg):
            logger.error("Unable to run lux benchmark: {}".format(err_msg))
            if error:
                error(err_msg)

        client = self.task_server.client
        node_name = client.get_node_name()
        datadir = client.datadir

        lux_benchmark = LuxBenchmark()
        lux_builder = LuxRenderTaskBuilder
        self.run_benchmark(lux_benchmark, lux_builder, datadir, node_name,
                           success_callback, error_callback)

    def run_blender_benchmark(self, success=None, error=None):
        def success_callback(performance):
            cfg_desc = client.config_desc
            cfg_desc.estimated_blender_performance = performance
            client.change_config(cfg_desc)
            self.config_changed()
            if success:
                success(performance)

        def error_callback(err_msg):
            logger.error("Unable to run blender benchmark: {}".format(err_msg))
            if error:
                error(err_msg)

        client = self.task_server.client
        node_name = client.get_node_name()
        datadir = client.datadir
        blender_benchmark = BlenderBenchmark()
        blender_builder = BlenderRenderTaskBuilder
        self.run_benchmark(blender_benchmark, blender_builder, datadir,
                           node_name, success_callback, error_callback)

    def run_benchmarks(self):
        self.run_lux_benchmark()
        self.run_blender_benchmark()

    def config_changed(self):
        for l in self.listeners:
            l.config_changed()

    def change_docker_config(self,
                             config_desc,
                             run_benchmarks,
                             in_background=True):
        dm = self.docker_manager
        dm.build_config(config_desc)

        if not dm.docker_machine and run_benchmarks:
            self.run_benchmarks()
            return

        if dm.docker_machine and self.use_docker_machine_manager:

            self.lock_config(True)

            def status_callback():
                return self.counting_task

            def done_callback():
                if run_benchmarks:
                    self.run_benchmarks()
                logger.debug("Resuming new task computation")
                self.lock_config(False)
                self.runnable = True

            self.runnable = False
            dm.update_config(status_callback, done_callback, in_background)

    def register_listener(self, listener):
        self.listeners.append(listener)

    def lock_config(self, on=True):
        for l in self.listeners:
            l.lock_config(on)

    def session_timeout(self):
        self.session_closed()

    def session_closed(self):
        if not self.counting_task:
            self.reset()

    def wait(self, wait=True, ttl=None):
        self.use_waiting_ttl = wait
        if ttl is None:
            self.waiting_ttl = self.waiting_for_task_session_timeout
        else:
            self.waiting_ttl = ttl

    def reset(self, computing_task=False):
        self.counting_task = computing_task
        self.use_waiting_ttl = False
        self.task_requested = False
        self.waiting_for_task = None
        self.waiting_ttl = 0

    def __request_task(self):
        with self.lock:
            perform_request = not self.waiting_for_task and not self.counting_task

        if not perform_request:
            return

        now = time.time()
        self.wait()
        self.last_checking = now
        self.last_task_request = now
        self.waiting_for_task = self.task_server.request_task()
        if self.waiting_for_task is not None:
            self.stats.increase_stat('tasks_requested')

    def __request_resource(self, task_id, resource_header, return_address,
                           return_port, key_id, task_owner):
        self.last_checking = time.time()
        self.wait(ttl=self.waiting_for_task_timeout)
        self.waiting_for_task = self.task_server.request_resource(
            task_id, resource_header, return_address, return_port, key_id,
            task_owner)

    def __compute_task(self, subtask_id, docker_images, src_code, extra_data,
                       short_desc, task_timeout):

        task_id = self.assigned_subtasks[subtask_id].task_id
        working_dir = self.assigned_subtasks[subtask_id].working_directory
        unique_str = str(uuid.uuid4())

        self.reset(computing_task=task_id)

        with self.dir_lock:
            resource_dir = self.resource_manager.get_resource_dir(task_id)
            temp_dir = os.path.join(
                self.resource_manager.get_temporary_dir(task_id), unique_str)
            # self.dir_manager.clear_temporary(task_id)

            if not os.path.exists(temp_dir):
                os.makedirs(temp_dir)

        if docker_images:
            tt = DockerTaskThread(self, subtask_id, docker_images, working_dir,
                                  src_code, extra_data, short_desc,
                                  resource_dir, temp_dir, task_timeout)
        elif self.support_direct_computation:
            tt = PyTaskThread(self, subtask_id, working_dir, src_code,
                              extra_data, short_desc, resource_dir, temp_dir,
                              task_timeout)
        else:
            logger.error("Cannot run PyTaskThread in this version")
            subtask = self.assigned_subtasks.pop(subtask_id)
            self.task_server.send_task_failed(
                subtask_id, subtask.task_id, "Host direct task not supported",
                subtask.return_address, subtask.return_port, subtask.key_id,
                subtask.task_owner, self.node_name)
            self.counting_task = None
            return

        self.current_computations.append(tt)
        tt.start()

    def quit(self):
        for t in self.current_computations:
            t.end_comp()
Example #6
0
    def test_stats_keeper(self):
        st = IntStatsKeeper(CompStats)
        self.assertTrue(isinstance(st, IntStatsKeeper))
        self._compare_stats(st, [0] * 6)

        st.increase_stat("computed_tasks")
        self._compare_stats(st, [1, 0, 0] * 2)
        st.increase_stat("computed_tasks")
        self._compare_stats(st, [2, 0, 0] * 2)
        st.increase_stat("computed_tasks")
        self._compare_stats(st, [3, 0, 0] * 2)

        st2 = IntStatsKeeper(CompStats)
        self._compare_stats(st2, [3] + [0] * 5)
        st2.increase_stat("computed_tasks")
        self._compare_stats(st2, [4, 0, 0, 1, 0, 0])
        st2.increase_stat("computed_tasks")
        self._compare_stats(st2, [5, 0, 0, 2, 0, 0])
        st.increase_stat("computed_tasks")
        self._compare_stats(st, [6, 0, 0, 4, 0, 0])