コード例 #1
0
 def testClearTemporary(self):
     dm = DirManager(self.path)
     task_id = '12345'
     tmp_dir = dm.get_task_temporary_dir(task_id)
     self.assertTrue(os.path.isdir(tmp_dir))
     file1 = os.path.join(tmp_dir, 'file1')
     file2 = os.path.join(tmp_dir, 'file2')
     dir1 = os.path.join(tmp_dir, 'dir1')
     file3 = os.path.join(dir1, 'file3')
     open(file1, 'w').close()
     open(file2, 'w').close()
     if not os.path.isdir(dir1):
         os.mkdir(dir1)
     open(file3, 'w').close()
     self.assertTrue(os.path.isfile(file1))
     self.assertTrue(os.path.isfile(file2))
     self.assertTrue(os.path.isfile(file3))
     self.assertTrue(os.path.isdir(dir1))
     dm.clear_temporary(task_id)
     self.assertTrue(os.path.isdir(tmp_dir))
     self.assertFalse(os.path.isfile(file1))
     self.assertFalse(os.path.isfile(file2))
     self.assertFalse(os.path.isfile(file3))
     self.assertFalse(os.path.isdir(dir1))
コード例 #2
0
class TaskManager(TaskEventListener):
    """ Keeps and manages information about requested tasks
    """
    handle_task_key_error = HandleKeyError(log_task_key_error)
    handle_subtask_key_error = HandleKeyError(log_subtask_key_error)

    def __init__(self,
                 node_name,
                 node,
                 keys_auth,
                 listen_address="",
                 listen_port=0,
                 root_path="res",
                 use_distributed_resources=True,
                 tasks_dir="tasks",
                 task_persistence=False):
        super(TaskManager, self).__init__()

        self.apps_manager = AppsManager()
        self.apps_manager.load_apps()

        apps = self.apps_manager.apps.values()
        task_types = [app.task_type_info(None, app.controller) for app in apps]
        self.task_types = {t.name.lower(): t for t in task_types}

        self.node_name = node_name
        self.node = node
        self.keys_auth = keys_auth
        self.key_id = keys_auth.get_key_id()

        self.tasks = {}
        self.tasks_states = {}
        self.subtask2task_mapping = {}

        self.listen_address = listen_address
        self.listen_port = listen_port

        # FIXME Remove this variable and make task persistance obligatory after it is more tested
        # Remember to also remove it from init params
        self.task_persistence = task_persistence

        self.tasks_dir = Path(tasks_dir)
        if not self.tasks_dir.is_dir():
            self.tasks_dir.mkdir(parents=True)
        self.root_path = root_path
        self.dir_manager = DirManager(self.get_task_manager_root())

        # resource_manager = OpenStackSwiftResourceManager(self.dir_manager,
        #                                                  resource_dir_method=self.dir_manager.get_task_temporary_dir)
        resource_manager = HyperdriveResourceManager(
            self.dir_manager,
            resource_dir_method=self.dir_manager.get_task_temporary_dir)
        self.task_result_manager = EncryptedResultPackageManager(
            resource_manager)

        self.activeStatus = [
            TaskStatus.computing, TaskStatus.starting, TaskStatus.waiting
        ]
        self.use_distributed_resources = use_distributed_resources

        self.comp_task_keeper = CompTaskKeeper(self.tasks_dir,
                                               persist=self.task_persistence)
        if self.task_persistence:
            self.restore_tasks()

    def get_task_manager_root(self):
        return self.root_path

    def get_external_address(self):
        request = AsyncRequest(get_external_address, self.listen_port)
        return async_run(request)

    def create_task(self, dictionary):
        # FIXME: remove after the new interface has been integrated with
        if not isinstance(dictionary, dict):
            return dictionary

        type_name = dictionary['type'].lower()
        task_type = self.task_types[type_name]
        builder_type = task_type.task_builder_type

        definition = builder_type.build_definition(task_type, dictionary)
        builder = builder_type(self.node_name, definition, self.root_path,
                               self.dir_manager)

        return Task.build_task(builder)

    def get_task_definition_dict(self, task):
        if isinstance(task, dict):
            return task
        definition = task.task_definition
        task_type = self.task_types[definition.task_type.lower()]
        return task_type.task_builder_type.build_dictionary(definition)

    @inlineCallbacks
    def add_new_task(self, task):
        if task.header.task_id in self.tasks:
            raise RuntimeError("Task has been already added")
        if not self.key_id:
            raise ValueError("'key_id' is not set")
        if not SocketAddress.is_proper_address(self.listen_address,
                                               self.listen_port):
            raise IOError("Incorrect socket address")

        prev_pub_addr, prev_pub_port, prev_nat_type = self.node.pub_addr, self.node.pub_port, self.node.nat_type
        self.node.pub_addr, self.node.pub_port, self.node.nat_type = yield self.get_external_address(
        )

        if prev_pub_addr != self.node.pub_addr or \
           prev_pub_port != self.node.pub_port or \
           prev_nat_type != self.node.nat_type:
            self.update_task_signatures()

        task.header.task_owner_address = self.listen_address
        task.header.task_owner_port = self.listen_port
        task.header.task_owner_key_id = self.key_id
        task.header.task_owner = self.node
        task.header.signature = self.sign_task_header(task.header)

        self.dir_manager.clear_temporary(task.header.task_id,
                                         undeletable=task.undeletable)
        self.dir_manager.get_task_temporary_dir(task.header.task_id,
                                                create=True)

        task.register_listener(self)
        task.task_status = TaskStatus.waiting

        self.tasks[task.header.task_id] = task

        ts = TaskState()
        ts.status = TaskStatus.waiting
        ts.outputs = task.get_output_names()
        ts.total_subtasks = task.get_total_tasks()
        ts.time_started = time.time()

        self.tasks_states[task.header.task_id] = ts

        if self.task_persistence:
            self.dump_task(task.header.task_id)
            logger.info("Task {} added".format(task.header.task_id))
            self.notice_task_updated(task.header.task_id)

    def dump_task(self, task_id):
        logger.debug('DUMP TASK')
        try:
            data = self.tasks[task_id], self.tasks_states[task_id]
            filepath = self.tasks_dir / ('%s.pickle' % (task_id, ))
            logger.debug('DUMP TASK %r', filepath)
            with filepath.open('wb') as f:
                pickle.dump(data, f, protocol=2)
        except:
            logger.exception('DUMP ERROR task_id: %r task: %r state: %r',
                             task_id, self.tasks.get(task_id, '<not found>'),
                             self.tasks_states.get(task_id, '<not found>'))
            if filepath.exists():
                filepath.unlink()
            raise

    def restore_tasks(self):
        logger.debug('RESTORE TASKS')
        for path in self.tasks_dir.iterdir():
            logger.debug('RESTORE TASKS %r', path)
            if not path.suffix == '.pickle':
                continue
            logger.debug('RESTORE TASKS really %r', path)
            with path.open('rb') as f:
                try:
                    task, state = pickle.load(f)
                    self.tasks[task.header.task_id] = task
                    self.tasks_states[task.header.task_id] = state
                except (pickle.UnpicklingError, EOFError, ImportError):
                    logger.exception('Problem restoring task from: %s', path)
                    path.unlink()
                    continue
            dispatcher.send(signal='golem.taskmanager',
                            event='task_restored',
                            task=task,
                            state=state)

    @handle_task_key_error
    def resources_send(self, task_id):
        self.tasks_states[task_id].status = TaskStatus.waiting
        self.tasks[task_id].task_status = TaskStatus.waiting
        self.notice_task_updated(task_id)
        logger.info("Resources for task {} sent".format(task_id))

    def get_next_subtask(self,
                         node_id,
                         node_name,
                         task_id,
                         estimated_performance,
                         price,
                         max_resource_size,
                         max_memory_size,
                         num_cores=0,
                         address=""):
        """ Assign next subtask from task <task_id> to node with given id <node_id> and name. If subtask is assigned
        the function is returning a tuple (
        :param node_id:
        :param node_name:
        :param task_id:
        :param estimated_performance:
        :param price:
        :param max_resource_size:
        :param max_memory_size:
        :param num_cores:
        :param address:
        :return (ComputeTaskDef|None, bool, bool): Function returns a triplet. First element is either ComputeTaskDef
        that describe assigned subtask or None. The second element describes whether the task_id is a wrong task that
        isn't in task manager register. If task with <task_id> it's a known task then second element of a pair is always
        False (regardless new subtask was assigned or not). The third element describes whether we're waiting for
        client's other task results.
        """
        logger.debug('get_next_subtask(%r, %r, %r, %r, %r, %r, %r, %r, %r)',
                     node_id, node_name, task_id, estimated_performance, price,
                     max_resource_size, max_memory_size, num_cores, address)
        if task_id not in self.tasks:
            logger.info("Cannot find task {} in my tasks".format(task_id))
            return None, True, False

        task = self.tasks[task_id]

        if task.header.max_price < price:
            return None, False, False

        def has_subtasks():
            if self.tasks_states[task_id].status not in self.activeStatus:
                logger.debug('state no in activestatus')
                return False
            if not task.needs_computation():
                logger.debug('not task.needs_computation')
                return False
            if task.header.resource_size > (long(max_resource_size) * 1024):
                logger.debug('resources size >')
                return False
            if task.header.estimated_memory > (long(max_memory_size) * 1024):
                logger.debug('estimated memory >')
                return False
            return True

        if not has_subtasks():
            logger.info(
                "Cannot get next task for estimated performance {}".format(
                    estimated_performance))
            return None, False, False

        extra_data = task.query_extra_data(estimated_performance, num_cores,
                                           node_id, node_name)
        if extra_data.should_wait:
            return None, False, True

        ctd = extra_data.ctd

        def check_compute_task_def():
            if not isinstance(ctd, ComputeTaskDef) or not ctd.subtask_id:
                logger.debug('check ctd: ctd not instance or not subtask_id')
                return False
            if task_id != ctd.task_id or ctd.subtask_id in self.subtask2task_mapping:
                logger.debug(
                    'check ctd: %r != %r or %r in self.subtask2task_maping',
                    task_id, ctd.task_id, ctd.subtask_id)
                return False
            if ctd.subtask_id in self.tasks_states[ctd.task_id].subtask_states:
                logger.debug('check ctd: subtask_states')
                return False
            return True

        if not check_compute_task_def():
            return None, False, False

        ctd.key_id = task.header.task_owner_key_id
        ctd.return_address = task.header.task_owner_address
        ctd.return_port = task.header.task_owner_port
        ctd.task_owner = task.header.task_owner

        self.subtask2task_mapping[ctd.subtask_id] = task_id
        self.__add_subtask_to_tasks_states(node_name, node_id, price, ctd,
                                           address)
        self.notice_task_updated(task_id)
        return ctd, False, extra_data.should_wait

    def get_tasks_headers(self):
        ret = []
        for t in self.tasks.values():
            if t.needs_computation() and t.task_status in self.activeStatus:
                ret.append(t.header)

        return ret

    def get_trust_mod(self, subtask_id):
        if subtask_id in self.subtask2task_mapping:
            task_id = self.subtask2task_mapping[subtask_id]
            return self.tasks[task_id].get_trust_mod(subtask_id)
        else:
            logger.error("This is not my subtask {}".format(subtask_id))
            return 0

    def update_task_signatures(self):
        for task in self.tasks.values():
            task.header.signature = self.sign_task_header(task.header)

    def sign_task_header(self, task_header):
        return self.keys_auth.sign(task_header.to_binary())

    def verify_subtask(self, subtask_id):
        if subtask_id in self.subtask2task_mapping:
            task_id = self.subtask2task_mapping[subtask_id]
            return self.tasks[task_id].verify_subtask(subtask_id)
        else:
            return False

    def get_node_id_for_subtask(self, subtask_id):
        if subtask_id in self.subtask2task_mapping:
            subtask_state = self.tasks_states[self.subtask2task_mapping[
                subtask_id]].subtask_states[subtask_id]
            return subtask_state.computer.node_id
        else:
            return None

    def set_value(self, task_id, subtask_id, value):
        if type(value) not in (int, long):
            raise TypeError(
                "Incorrect 'value' type: {}. Should be int or long".format(
                    type(value)))
        task_state = self.tasks_states.get(task_id)
        if task_state is None:
            logger.warning("This is not my task {}".format(task_id))
            return
        subtask_state = task_state.subtask_states.get(subtask_id)
        if subtask_state is None:
            logger.warning("This is not my subtask {}".format(subtask_id))
            return
        subtask_state.value = value

    @handle_subtask_key_error
    def get_value(self, subtask_id):
        """ Return value of a given subtask
        :param subtask_id:  id of a computed subtask
        :return long: price that should be paid for given subtask
        """
        task_id = self.subtask2task_mapping[subtask_id]
        return self.tasks_states[task_id].subtask_states[subtask_id].value

    @handle_subtask_key_error
    def computed_task_received(self, subtask_id, result, result_type):
        task_id = self.subtask2task_mapping[subtask_id]

        subtask_state = self.tasks_states[task_id].subtask_states[subtask_id]
        subtask_status = subtask_state.subtask_status

        if not SubtaskStatus.is_computed(subtask_status):
            logger.warning(
                "Result for subtask {} when subtask state is {}".format(
                    subtask_id, subtask_status))
            self.notice_task_updated(task_id)
            return False

        self.tasks[task_id].computation_finished(subtask_id, result,
                                                 result_type)
        ss = self.tasks_states[task_id].subtask_states[subtask_id]
        ss.subtask_progress = 1.0
        ss.subtask_rem_time = 0.0
        ss.subtask_status = SubtaskStatus.finished
        ss.stdout = self.tasks[task_id].get_stdout(subtask_id)
        ss.stderr = self.tasks[task_id].get_stderr(subtask_id)
        ss.results = self.tasks[task_id].get_results(subtask_id)

        if not self.tasks[task_id].verify_subtask(subtask_id):
            logger.debug("Subtask {} not accepted\n".format(subtask_id))
            ss.subtask_status = SubtaskStatus.failure
            self.notice_task_updated(task_id)
            return False

        if self.tasks_states[task_id].status in self.activeStatus:
            if not self.tasks[task_id].finished_computation():
                self.tasks_states[task_id].status = TaskStatus.computing
            else:
                if self.tasks[task_id].verify_task():
                    logger.debug("Task {} accepted".format(task_id))
                    self.tasks_states[task_id].status = TaskStatus.finished
                else:
                    logger.debug("Task {} not accepted".format(task_id))
        self.notice_task_updated(task_id)
        return True

    @handle_subtask_key_error
    def task_computation_failure(self, subtask_id, err):
        task_id = self.subtask2task_mapping[subtask_id]

        subtask_state = self.tasks_states[task_id].subtask_states[subtask_id]
        subtask_status = subtask_state.subtask_status

        if not SubtaskStatus.is_computed(subtask_status):
            logger.warning(
                "Result for subtask {} when subtask state is {}".format(
                    subtask_id, subtask_status))
            self.notice_task_updated(task_id)
            return False

        self.tasks[task_id].computation_failed(subtask_id)
        ss = self.tasks_states[task_id].subtask_states[subtask_id]
        ss.subtask_progress = 1.0
        ss.subtask_rem_time = 0.0
        ss.subtask_status = SubtaskStatus.failure
        ss.stderr = str(err)

        self.notice_task_updated(task_id)
        return True

    def task_result_incoming(self, subtask_id):
        node_id = self.get_node_id_for_subtask(subtask_id)

        if node_id and subtask_id in self.subtask2task_mapping:
            task_id = self.subtask2task_mapping[subtask_id]
            if task_id in self.tasks:
                task = self.tasks[task_id]
                states = self.tasks_states[task_id].subtask_states[subtask_id]

                task.result_incoming(subtask_id)
                states.subtask_status = SubtaskStatus.downloading

                self.notify_update_task(task_id)
            else:
                logger.error("Unknown task id: {}".format(task_id))
        else:
            logger.error("Node_id {} or subtask_id {} does not exist".format(
                node_id, subtask_id))

    # CHANGE TO RETURN KEY_ID (check IF SUBTASK COMPUTER HAS KEY_ID
    def check_timeouts(self):
        nodes_with_timeouts = []
        for t in self.tasks.values():
            th = t.header
            if self.tasks_states[th.task_id].status not in self.activeStatus:
                continue
            cur_time = get_timestamp_utc()
            if cur_time > th.deadline:
                logger.info("Task {} dies".format(th.task_id))
                t.task_stats = TaskStatus.timeout
                self.tasks_states[th.task_id].status = TaskStatus.timeout
                self.notice_task_updated(th.task_id)
            ts = self.tasks_states[th.task_id]
            for s in ts.subtask_states.values():
                if SubtaskStatus.is_computed(s.subtask_status):
                    if cur_time > s.deadline:
                        logger.info("Subtask {} dies".format(s.subtask_id))
                        s.subtask_status = SubtaskStatus.failure
                        nodes_with_timeouts.append(s.computer.node_id)
                        t.computation_failed(s.subtask_id)
                        s.stderr = "[GOLEM] Timeout"
                        self.notice_task_updated(th.task_id)
        return nodes_with_timeouts

    def get_progresses(self):
        tasks_progresses = {}

        for t in self.tasks.values():
            if t.get_progress() < 1.0:
                ltss = LocalTaskStateSnapshot(t.header.task_id,
                                              t.get_total_tasks(),
                                              t.get_active_tasks(),
                                              t.get_progress(),
                                              t.short_extra_data_repr(2200.0))
                tasks_progresses[t.header.task_id] = ltss

        return tasks_progresses

    @handle_task_key_error
    def get_resources(self, task_id, resource_header, resource_type=0):
        task = self.tasks[task_id]
        return task.get_resources(resource_header, resource_type)

    @handle_task_key_error
    def restart_task(self, task_id):
        logger.info("restarting task")
        self.dir_manager.clear_temporary(
            task_id, undeletable=self.tasks[task_id].undeletable)

        self.tasks[task_id].restart()
        self.tasks[task_id].task_status = TaskStatus.waiting
        self.tasks_states[task_id].status = TaskStatus.waiting
        self.tasks_states[task_id].time_started = time.time()

        for ss in self.tasks_states[task_id].subtask_states.values():
            if ss.subtask_status != SubtaskStatus.failure:
                ss.subtask_status = SubtaskStatus.restarted

        self.notice_task_updated(task_id)

    @handle_subtask_key_error
    def restart_subtask(self, subtask_id):
        task_id = self.subtask2task_mapping[subtask_id]
        self.tasks[task_id].restart_subtask(subtask_id)
        self.tasks_states[task_id].status = TaskStatus.computing
        self.tasks_states[task_id].subtask_states[
            subtask_id].subtask_status = SubtaskStatus.restarted
        self.tasks_states[task_id].subtask_states[
            subtask_id].stderr = "[GOLEM] Restarted"

        self.notice_task_updated(task_id)

    @handle_task_key_error
    def abort_task(self, task_id):
        self.tasks[task_id].abort()
        self.tasks[task_id].task_status = TaskStatus.aborted
        self.tasks_states[task_id].status = TaskStatus.aborted
        for sub in self.tasks_states[task_id].subtask_states.values():
            del self.subtask2task_mapping[sub.subtask_id]
        self.tasks_states[task_id].subtask_states.clear()

        self.notice_task_updated(task_id)

    @handle_task_key_error
    def pause_task(self, task_id):
        self.tasks[task_id].task_status = TaskStatus.paused
        self.tasks_states[task_id].status = TaskStatus.paused

        self.notice_task_updated(task_id)

    @handle_task_key_error
    def resume_task(self, task_id):
        self.tasks[task_id].task_status = TaskStatus.starting
        self.tasks_states[task_id].status = TaskStatus.starting

        self.notice_task_updated(task_id)

    @handle_task_key_error
    def delete_task(self, task_id):
        for sub in self.tasks_states[task_id].subtask_states.values():
            del self.subtask2task_mapping[sub.subtask_id]
        self.tasks_states[task_id].subtask_states.clear()

        self.tasks[task_id].unregister_listener(self)
        del self.tasks[task_id]
        del self.tasks_states[task_id]

        self.dir_manager.clear_temporary(task_id)

    @handle_task_key_error
    def query_task_state(self, task_id):
        ts = self.tasks_states[task_id]
        t = self.tasks[task_id]

        ts.progress = t.get_progress()
        ts.elapsed_time = time.time() - ts.time_started

        if ts.progress > 0.0:
            ts.remaining_time = (ts.elapsed_time /
                                 ts.progress) - ts.elapsed_time
        else:
            ts.remaining_time = -0.0

        t.update_task_state(ts)

        return ts

    def get_subtasks(self, task_id):
        """
        Get all subtasks related to given task id
        :param task_id: Task ID
        :return: list of all subtasks related with @task_id or None if @task_id is not known
        """
        if task_id not in self.tasks_states:
            return None
        return [
            sub.subtask_id
            for sub in self.tasks_states[task_id].subtask_states.values()
        ]

    def change_config(self, root_path, use_distributed_resource_management):
        self.dir_manager = DirManager(root_path)
        self.use_distributed_resources = use_distributed_resource_management

    @handle_task_key_error
    def change_timeouts(self, task_id, full_task_timeout, subtask_timeout):
        task = self.tasks[task_id]
        task.header.deadline = timeout_to_deadline(full_task_timeout)
        task.header.subtask_timeout = subtask_timeout
        task.full_task_timeout = full_task_timeout
        task.header.last_checking = time.time()

    def get_task_id(self, subtask_id):
        return self.subtask2task_mapping[subtask_id]

    def get_task_dict(self, task_id):
        task = self.tasks[task_id]

        # single=True retrieves one preview file. If rendering frames,
        # it's the preview of the most recently computed frame.
        dictionary = {u'preview': task.get_preview(single=True)}
        dictionary.update(self.get_simple_task_dict(task))
        dictionary.update(self.get_task_definition_dict(task))
        return dictionary

    def get_tasks_dict(self):
        return [self.get_simple_task_dict(t) for t in self.tasks.itervalues()]

    def get_subtask_dict(self, subtask_id):
        task_id = self.subtask2task_mapping[subtask_id]
        task_state = self.tasks_states[task_id]
        subtask = task_state.subtask_states[subtask_id]
        return subtask.to_dictionary()

    def get_subtasks_dict(self, task_id):
        task_state = self.tasks_states[task_id]
        subtasks = task_state.subtask_states
        return [subtask.to_dictionary() for subtask in subtasks.itervalues()]

    def get_subtasks_borders(self, task_id):
        task = self.tasks[task_id]
        task_type_name = task.task_definition.task_type.lower()
        task_type = self.task_types[task_type_name]
        task_state = self.tasks_states[task_id]
        total_subtasks = task.get_total_tasks()

        return {
            to_unicode(subtask.subtask_id):
            task_type.get_task_border(subtask,
                                      task.task_definition,
                                      total_subtasks,
                                      as_path=True)
            for subtask in task_state.subtask_states.values()
        }

    def get_simple_task_dict(self, task):
        state = self.tasks_states.get(task.header.task_id)
        timeout = task.task_definition.full_task_timeout

        dictionary = {u'duration': max(timeout - state.remaining_time, 0)}
        dictionary.update(task.to_dictionary())
        dictionary.update(state.to_dictionary())
        return dictionary

    def get_task_preview(self, task_id, single=False):
        return self.tasks[task_id].get_preview(single=single)

    @handle_subtask_key_error
    def set_computation_time(self, subtask_id, computation_time):
        """
        Set computation time for subtask and also compute and set new value based on saved price for this subtask
        :param str subtask_id: subtask which was computed in given computation_time
        :param float computation_time: how long does it take to compute this task
        :return:
        """
        task_id = self.subtask2task_mapping[subtask_id]
        ss = self.tasks_states[task_id].subtask_states[subtask_id]
        ss.computation_time = computation_time
        ss.value = compute_subtask_value(ss.computer.price, computation_time)

    def add_comp_task_request(self, theader, price):
        """ Add a header of a task which this node may try to compute """
        self.comp_task_keeper.add_request(theader, price)

    @handle_task_key_error
    def get_payment_for_task_id(self, task_id):
        val = 0.0
        t = self.tasks_states[task_id]
        for ss in t.subtask_states.values():
            val += ss.value
        return val

    def __add_subtask_to_tasks_states(self, node_name, node_id, price, ctd,
                                      address):

        if ctd.task_id not in self.tasks_states:
            raise RuntimeError("Should never be here!")

        logger.debug('add_subtask_to_tasks_states(%r, %r, %r, %r, %r)',
                     node_name, node_id, price, ctd, address)

        ss = SubtaskState()
        ss.computer.node_id = node_id
        ss.computer.node_name = node_name
        ss.computer.performance = ctd.performance
        ss.computer.ip_address = address
        ss.computer.price = price
        ss.time_started = time.time()
        ss.deadline = ctd.deadline
        # TODO: read node ip address
        ss.subtask_definition = ctd.short_description
        ss.subtask_id = ctd.subtask_id
        ss.extra_data = ctd.extra_data
        ss.subtask_status = TaskStatus.starting
        ss.value = 0

        self.tasks_states[ctd.task_id].subtask_states[ctd.subtask_id] = ss

    def notify_update_task(self, task_id):
        self.notice_task_updated(task_id)

    @handle_task_key_error
    def notice_task_updated(self, task_id):
        # self.save_state()
        if self.task_persistence:
            self.dump_task(task_id)
        dispatcher.send(signal='golem.taskmanager',
                        event='task_status_updated',
                        task_id=task_id)
コード例 #3
0
ファイル: taskmanager.py プロジェクト: U0001F3A2/golem
class TaskManager(TaskEventListener):
    """ Keeps and manages information about requested tasks
    Requestor uses TaskManager to assign task to providers
    """
    handle_task_key_error = HandleKeyError(log_task_key_error)
    handle_subtask_key_error = HandleKeyError(log_subtask_key_error)
    handle_generic_key_error = HandleForwardedError(KeyError,
                                                    log_generic_key_error)

    class Error(Exception):
        pass

    class AlreadyRestartedError(Error):
        pass

    def __init__(self,
                 node_name,
                 node,
                 keys_auth,
                 root_path,
                 use_distributed_resources=True,
                 tasks_dir="tasks",
                 task_persistence=True,
                 apps_manager=AppsManager(),
                 finished_cb=None):
        super().__init__()

        self.apps_manager = apps_manager
        apps = list(apps_manager.apps.values())
        task_types = [app.task_type_info() for app in apps]
        self.task_types = {t.name.lower(): t for t in task_types}

        self.node_name = node_name
        self.node = node
        self.keys_auth = keys_auth
        self.key_id = keys_auth.key_id

        self.tasks: Dict[str, Task] = {}
        self.tasks_states: Dict[str, TaskState] = {}
        self.subtask2task_mapping: Dict[str, str] = {}

        self.task_persistence = task_persistence

        tasks_dir = Path(tasks_dir)
        self.tasks_dir = tasks_dir / "tmanager"
        if not self.tasks_dir.is_dir():
            self.tasks_dir.mkdir(parents=True)
        self.root_path = root_path
        self.dir_manager = DirManager(self.get_task_manager_root())

        resource_manager = HyperdriveResourceManager(
            self.dir_manager,
            resource_dir_method=self.dir_manager.get_task_temporary_dir,
        )
        self.task_result_manager = EncryptedResultPackageManager(
            resource_manager)

        self.activeStatus = [
            TaskStatus.computing, TaskStatus.starting, TaskStatus.waiting
        ]
        self.use_distributed_resources = use_distributed_resources

        self.comp_task_keeper = CompTaskKeeper(
            tasks_dir,
            persist=self.task_persistence,
        )

        self.requestor_stats_manager = RequestorTaskStatsManager()

        self.finished_cb = finished_cb

        if self.task_persistence:
            self.restore_tasks()

    def get_task_manager_root(self):
        return self.root_path

    def create_task(self, dictionary, minimal=False):
        purpose = TaskPurpose.TESTING if minimal else TaskPurpose.REQUESTING
        type_name = dictionary['type'].lower()
        compute_on = dictionary.get('compute_on', 'cpu').lower()
        is_requesting = purpose == TaskPurpose.REQUESTING

        if type_name == "blender" and is_requesting and compute_on == "gpu":
            type_name = type_name + "_nvgpu"

        task_type = self.task_types[type_name].for_purpose(purpose)
        builder_type = task_type.task_builder_type

        definition = builder_type.build_definition(task_type, dictionary,
                                                   minimal)
        definition.task_id = CoreTask.create_task_id(self.keys_auth.public_key)
        definition.concent_enabled = dictionary.get('concent_enabled', False)
        builder = builder_type(self.node, definition, self.dir_manager)

        return builder.build()

    def get_task_definition_dict(self, task: Task):
        if isinstance(task, dict):
            return task
        definition = task.task_definition
        task_type = self.task_types[definition.task_type.lower()]
        return task_type.task_builder_type.build_dictionary(definition)

    def add_new_task(self, task: Task, estimated_fee: int = 0) -> None:
        task_id = task.header.task_id
        if task_id in self.tasks:
            raise RuntimeError("Task {} has been already added".format(
                task.header.task_id))
        if not self.key_id:
            raise ValueError("'key_id' is not set")

        task.header.fixed_header.task_owner = self.node
        task.header.signature = self.sign_task_header(task.header)

        task.create_reference_data_for_task_validation()
        task.register_listener(self)

        ts = TaskState()
        ts.status = TaskStatus.notStarted
        ts.outputs = task.get_output_names()
        ts.subtasks_count = task.get_total_tasks()
        ts.time_started = time.time()
        ts.estimated_cost = task.price
        ts.estimated_fee = estimated_fee

        self.tasks[task_id] = task
        self.tasks_states[task_id] = ts
        logger.info("Task %s added", task_id)

        self.notice_task_updated(task_id, op=TaskOp.CREATED, persist=False)

    @handle_task_key_error
    def increase_task_mask(self, task_id: str, num_bits: int = 1) -> None:
        """ Increase mask for given task i.e. make it more restrictive """
        task = self.tasks[task_id]
        try:
            task.header.mask.increase(num_bits)
        except ValueError:
            logger.exception('Wrong number of bits for mask increase')
        else:
            task.header.signature = self.sign_task_header(task.header)

    @handle_task_key_error
    def decrease_task_mask(self, task_id: str, num_bits: int = 1) -> None:
        """ Decrease mask for given task i.e. make it less restrictive """
        task = self.tasks[task_id]
        try:
            task.header.mask.decrease(num_bits)
        except ValueError:
            logger.exception('Wrong number of bits for mask decrease')
        else:
            task.header.signature = self.sign_task_header(task.header)

    @handle_task_key_error
    def start_task(self, task_id):
        task_state = self.tasks_states[task_id]

        if not task_state.status.is_preparing():
            raise RuntimeError(
                "Task {} has already been started".format(task_id))

        task_state.status = TaskStatus.waiting
        self.notice_task_updated(task_id, op=TaskOp.STARTED)
        logger.info("Task %s started", task_id)

    def _dump_filepath(self, task_id):
        return self.tasks_dir / ('%s.pickle' % (task_id, ))

    def dump_task(self, task_id: str) -> None:
        logger.debug('DUMP TASK %r', task_id)
        filepath = self._dump_filepath(task_id)
        try:
            data = self.tasks[task_id], self.tasks_states[task_id]
            logger.debug('DUMPING TASK %r', filepath)
            with filepath.open('wb') as f:
                pickle.dump(data, f, protocol=2)
            logger.debug('TASK %s DUMPED in %r', task_id, filepath)
        except Exception as e:
            logger.exception(
                'DUMP ERROR task_id: %r task: %r state: %r',
                task_id,
                self.tasks.get(task_id, '<not found>'),
                self.tasks_states.get(task_id, '<not found>'),
            )
            if filepath.exists():
                filepath.unlink()
            raise

    def remove_dump(self, task_id: str):
        filepath = self._dump_filepath(task_id)
        try:
            filepath.unlink()
            logger.debug('TASK DUMP with id %s REMOVED from %r', task_id,
                         filepath)
        except (FileNotFoundError, OSError) as e:
            logger.warning("Couldn't remove dump file: %s - %s", filepath, e)

    @staticmethod
    def _migrate_status_to_enum(state: TaskState) -> None:
        """
        This is a migration for data stored in pickles.
        See #2768
        """
        if isinstance(state.status, str):
            state.status = TaskStatus(state.status)

        subtask_state: SubtaskState
        for subtask_state in state.subtask_states.values():
            if isinstance(subtask_state.subtask_status, str):
                subtask_state.subtask_status = \
                    SubtaskStatus(subtask_state.subtask_status)

    def restore_tasks(self) -> None:
        logger.debug('SEARCHING FOR TASKS TO RESTORE')
        broken_paths = set()
        for path in self.tasks_dir.iterdir():
            if not path.suffix == '.pickle':
                continue
            logger.debug('RESTORE TASKS %r', path)

            task_id = None
            with path.open('rb') as f:
                try:
                    task: Task
                    state: TaskState
                    task, state = pickle.load(f)

                    TaskManager._migrate_status_to_enum(state)

                    task.register_listener(self)

                    task_id = task.header.task_id
                    self.tasks[task_id] = task
                    self.tasks_states[task_id] = state

                    for sub in state.subtask_states.values():
                        self.subtask2task_mapping[sub.subtask_id] = task_id

                    logger.debug('TASK %s RESTORED from %r', task_id, path)
                except (pickle.UnpicklingError, EOFError, ImportError,
                        KeyError, AttributeError):
                    logger.exception('Problem restoring task from: %s', path)
                    # On Windows, attempting to remove a file that is in use
                    # causes an exception to be raised, therefore
                    # we'll remove broken files later
                    broken_paths.add(path)

            if task_id is not None:
                self.notice_task_updated(task_id,
                                         op=TaskOp.RESTORED,
                                         persist=False)

        for path in broken_paths:
            path.unlink()

    @handle_task_key_error
    def resources_send(self, task_id):
        self.tasks_states[task_id].status = TaskStatus.waiting
        self.notice_task_updated(task_id)
        logger.info("Resources for task {} sent".format(task_id))

    def got_wants_to_compute(
        self,
        task_id: str,
        key_id: str,  # pylint: disable=unused-argument
        node_name: str):  # pylint: disable=unused-argument
        """
        Updates number of offers to compute task.

        For statistical purposes only, real processing of the offer is done
        elsewhere. Silently ignores wrong task ids.

        :param str task_id: id of the task in the offer
        :param key_id: id of the node offering computations
        :param node_name: name of the node offering computations
        :return: Nothing
        :rtype: None
        """
        if task_id in self.tasks:
            self.notice_task_updated(task_id,
                                     op=TaskOp.WORK_OFFER_RECEIVED,
                                     persist=False)

    def task_needs_computation(self, task_id: str) -> bool:
        task_status = self.tasks_states[task_id].status
        if task_status not in self.activeStatus:
            logger.info(
                f'task is not active: {task_id}, status: {task_status}')
            return False
        task = self.tasks[task_id]
        if not task.needs_computation():
            logger.info(f'no more computation needed: {task_id}')
            return False
        return True

    def get_next_subtask(self,
                         node_id,
                         node_name,
                         task_id,
                         estimated_performance,
                         price,
                         max_resource_size,
                         max_memory_size,
                         num_cores=0,
                         address=""):
        """ Assign next subtask from task <task_id> to node with given
        id <node_id> and name. If subtask is assigned the function
        is returning a tuple
        :param node_id:
        :param node_name:
        :param task_id:
        :param estimated_performance:
        :param price:
        :param max_resource_size:
        :param max_memory_size:
        :param num_cores:
        :param address:
        :return (ComputeTaskDef|None: Function returns a ComputeTaskDef.
        First element is either ComputeTaskDef that describe assigned subtask
        or None. It is recommended to call is_my_task and should_wait_for_node
        before this to find the reason why the task is not able to be picked up
        """
        logger.debug(
            'get_next_subtask(%r, %r, %r, %r, %r, %r, %r, %r, %r)',
            node_id,
            node_name,
            task_id,
            estimated_performance,
            price,
            max_resource_size,
            max_memory_size,
            num_cores,
            address,
        )

        if not self.is_my_task(task_id):
            return None

        if not self.check_next_subtask(node_id, node_name, task_id, price):
            return None

        if self.should_wait_for_node(task_id, node_id):
            return None

        task = self.tasks[task_id]

        if task.get_progress() == 1.0:
            logger.error(
                "Task already computed. "
                "task_id=%r, node_name=%r, node_id=%r", task_id, node_name,
                node_id)
            return None

        extra_data = task.query_extra_data(estimated_performance, num_cores,
                                           node_id, node_name)
        ctd = extra_data.ctd

        def check_compute_task_def():
            if not isinstance(ctd, ComputeTaskDef) or not ctd['subtask_id']:
                logger.debug('check ctd: ctd not instance or not subtask_id')
                return False
            if task_id != ctd['task_id'] \
                    or ctd['subtask_id'] in self.subtask2task_mapping:
                logger.debug(
                    'check ctd: %r != %r or %r in self.subtask2task_maping',
                    task_id,
                    ctd['task_id'],
                    ctd['subtask_id'],
                )
                return False
            if (ctd['subtask_id']
                    in self.tasks_states[ctd['task_id']].subtask_states):
                logger.debug('check ctd: subtask_states')
                return False
            return True

        if not check_compute_task_def():
            return None

        task.accept_client(node_id)

        self.subtask2task_mapping[ctd['subtask_id']] = task_id
        self.__add_subtask_to_tasks_states(
            node_name,
            node_id,
            ctd,
            address,
        )
        self.notice_task_updated(task_id,
                                 subtask_id=ctd['subtask_id'],
                                 op=SubtaskOp.ASSIGNED)
        logger.debug(
            "Subtask generated. task=%s, node=%s, ctd=%s",
            task_id,
            node_info_str(node_name, node_id),
            ctd,
        )

        return ctd

    def is_my_task(self, task_id) -> bool:
        """ Check if the task_id is known by this node """
        return task_id in self.tasks

    def should_wait_for_node(self, task_id, node_id) -> bool:
        """ Check if the node has too many tasks assigned already """
        if not self.is_my_task(task_id):
            logger.debug(
                "Not my task. task_id=%s, node=%s",
                task_id,
                short_node_id(node_id),
            )
            return False

        task = self.tasks[task_id]

        verdict = task.should_accept_client(node_id)
        logger.debug(
            "Should accept client verdict. verdict=%s, task=%s, node=%s",
            verdict,
            task_id,
            short_node_id(node_id),
        )
        if verdict == AcceptClientVerdict.SHOULD_WAIT:
            logger.warning("Waiting for results from %s on %s",
                           short_node_id(node_id), task_id)
            return True
        elif verdict == AcceptClientVerdict.REJECTED:
            logger.warning(
                "Client has failed on subtask within this task"
                " and is banned from it. node_id=%s, task_id=%s",
                short_node_id(node_id), task_id)

        return False

    def check_next_subtask(  # noqa pylint: disable=too-many-arguments
            self, node_id, node_name, task_id, price):
        """ Check next subtask from task <task_id> to give to node with
        id <node_id> and name. The returned tuple can be used to find the reason
        and handle accordingly.
        :param node_id:
        :param node_name:
        :param task_id:
        :param price:
        :return bool: Function returns a boolean.
        The return value describes if the task is able to be assigned
        """
        logger.debug(
            'check_next_subtask(%r, %r, %r, %r)',
            node_id,
            node_name,
            task_id,
            price,
        )
        if not self.is_my_task(task_id):
            logger.info(
                "Cannot find task in my tasks. task_id=%s, provider=%s",
                task_id, node_info_str(node_name, node_id))
            return False

        task = self.tasks[task_id]

        if task.header.max_price < price:
            return False

        if not self.task_needs_computation(task_id):
            logger.info(
                'Task does not need computation. task_id=%s, provider=%s',
                task_id, node_info_str(node_name, node_id))
            return False

        return True

    def copy_results(self, old_task_id: str, new_task_id: str,
                     subtask_ids_to_copy: Iterable[str]) -> None:

        try:
            old_task = self.tasks[old_task_id]
            new_task = self.tasks[new_task_id]
            assert isinstance(old_task, CoreTask)
            assert isinstance(new_task, CoreTask)
        except (KeyError, AssertionError):
            logger.exception('Cannot copy results from task %r to %r',
                             old_task_id, new_task_id)
            return

        # Map new subtasks to old by 'start_task'
        subtasks_to_copy = {
            subtask['start_task']: subtask
            for subtask in map(lambda id_: old_task.subtasks_given[id_],
                               subtask_ids_to_copy)
        }

        # Generate all subtasks for the new task
        new_subtasks_ids = []
        while new_task.needs_computation():
            extra_data = new_task.query_extra_data(0,
                                                   node_id=str(uuid.uuid4()))
            new_subtask_id = extra_data.ctd['subtask_id']
            self.subtask2task_mapping[new_subtask_id] = \
                new_task_id
            self.__add_subtask_to_tasks_states(node_name=None,
                                               node_id=None,
                                               address=None,
                                               ctd=extra_data.ctd)
            new_subtasks_ids.append(new_subtask_id)

        # it's important to do this step separately, to not disturb
        # 'needs_computation' condition above
        for new_subtask_id in new_subtasks_ids:
            self.tasks_states[new_task_id].subtask_states[new_subtask_id]\
                .subtask_status = SubtaskStatus.failure
            new_task.subtasks_given[new_subtask_id]['status'] \
                = SubtaskStatus.failure
            new_task.num_failed_subtasks += 1

        def handle_copy_error(subtask_id, error):
            logger.error('Cannot copy result of subtask %r: %r', subtask_id,
                         error)
            self.restart_subtask(subtask_id)

        for new_subtask_id, new_subtask in new_task.subtasks_given.items():
            old_subtask = subtasks_to_copy.get(new_subtask['start_task'])

            if old_subtask:  # Copy results from old subtask
                deferred = self._copy_subtask_results(old_task=old_task,
                                                      new_task=new_task,
                                                      old_subtask=old_subtask,
                                                      new_subtask=new_subtask)
                deferred.addErrback(partial(handle_copy_error, new_subtask_id))

            else:  # Restart subtask to get it computed
                self.restart_subtask(new_subtask_id)

    def _copy_subtask_results(self, old_task: CoreTask, new_task: CoreTask,
                              old_subtask: dict,
                              new_subtask: dict) -> Deferred:

        old_task_id = old_task.header.task_id
        new_task_id = new_task.header.task_id
        assert isinstance(old_task.tmp_dir, str)
        assert isinstance(new_task.tmp_dir, str)
        old_tmp_dir = Path(old_task.tmp_dir)
        new_tmp_dir = Path(new_task.tmp_dir)
        old_subtask_id = old_subtask['subtask_id']
        new_subtask_id = new_subtask['subtask_id']

        def copy_and_extract_zips():
            # TODO: Refactor this using package manager (?)
            old_result_path = old_tmp_dir / '{}.{}.zip'.format(
                old_task_id, old_subtask_id)
            new_result_path = new_tmp_dir / '{}.{}.zip'.format(
                new_task_id, new_subtask_id)
            shutil.copy(old_result_path, new_result_path)

            subtask_result_dir = new_tmp_dir / new_subtask_id
            os.makedirs(subtask_result_dir)
            with ZipFile(new_result_path, 'r') as zf:
                zf.extractall(subtask_result_dir)
                return [
                    str(subtask_result_dir / name) for name in zf.namelist()
                    if name != '.package_desc'
                ]

        def after_results_extracted(results):
            new_task.copy_subtask_results(new_subtask_id, old_subtask, results)

            new_subtask_state = \
                self.__set_subtask_state_finished(new_subtask_id)
            old_subtask_state = self.tasks_states[old_task_id] \
                .subtask_states[old_subtask_id]

            self.notice_task_updated(task_id=new_task_id,
                                     subtask_id=new_subtask_id,
                                     op=SubtaskOp.FINISHED)

        deferred = deferToThread(copy_and_extract_zips)
        deferred.addCallback(after_results_extracted)
        return deferred

    def get_tasks_headers(self):
        ret = []
        for tid, task in self.tasks.items():
            status = self.tasks_states[tid].status
            if task.needs_computation() and status in self.activeStatus:
                ret.append(task.header)

        return ret

    def get_trust_mod(self, subtask_id):
        if subtask_id in self.subtask2task_mapping:
            task_id = self.subtask2task_mapping[subtask_id]
            return self.tasks[task_id].get_trust_mod(subtask_id)
        else:
            logger.error("This is not my subtask {}".format(subtask_id))
            return 0

    def update_task_signatures(self):
        for task in list(self.tasks.values()):
            task.header.signature = self.sign_task_header(task.header)

    def sign_task_header(self, task_header):
        return self.keys_auth.sign(task_header.to_binary())

    def verify_subtask(self, subtask_id):
        if subtask_id in self.subtask2task_mapping:
            task_id = self.subtask2task_mapping[subtask_id]
            return self.tasks[task_id].verify_subtask(subtask_id)
        else:
            return False

    def is_this_my_task(self, header: TaskHeader) -> bool:
        return header.task_id in self.tasks or \
               header.task_owner.key == self.node.key

    def get_node_id_for_subtask(self, subtask_id):
        if subtask_id not in self.subtask2task_mapping:
            return None
        task = self.subtask2task_mapping[subtask_id]
        subtask_state = self.tasks_states[task].subtask_states[subtask_id]
        return subtask_state.node_id

    @handle_subtask_key_error
    def computed_task_received(self, subtask_id, result,
                               verification_finished):
        task_id = self.subtask2task_mapping[subtask_id]

        subtask_state = self.tasks_states[task_id].subtask_states[subtask_id]
        subtask_status = subtask_state.subtask_status

        if not subtask_status.is_computed():
            logger.warning(
                "Result for subtask {} when subtask state is {}".format(
                    subtask_id, subtask_status.value))
            self.notice_task_updated(task_id,
                                     subtask_id=subtask_id,
                                     op=OtherOp.UNEXPECTED)
            verification_finished()
            return
        subtask_state.subtask_status = SubtaskStatus.verifying

        @TaskManager.handle_generic_key_error
        def verification_finished_():
            ss = self.__set_subtask_state_finished(subtask_id)
            if not self.tasks[task_id].verify_subtask(subtask_id):
                logger.debug("Subtask %r not accepted\n", subtask_id)
                ss.subtask_status = SubtaskStatus.failure
                self.notice_task_updated(task_id,
                                         subtask_id=subtask_id,
                                         op=SubtaskOp.NOT_ACCEPTED)
                verification_finished()
                return

            self.notice_task_updated(task_id,
                                     subtask_id=subtask_id,
                                     op=SubtaskOp.FINISHED)

            if self.tasks_states[task_id].status in self.activeStatus:
                if not self.tasks[task_id].finished_computation():
                    self.tasks_states[task_id].status = TaskStatus.computing
                else:
                    if self.tasks[task_id].verify_task():
                        logger.info("Task finished! task_id=%r", task_id)
                        self.tasks_states[task_id].status =\
                            TaskStatus.finished
                        self.notice_task_updated(task_id, op=TaskOp.FINISHED)
                    else:
                        logger.warning(
                            "Task finished but was not accepted. "
                            "task_id=%r", task_id)
                        self.notice_task_updated(task_id,
                                                 op=TaskOp.NOT_ACCEPTED)
            verification_finished()

        self.tasks[task_id].computation_finished(subtask_id, result,
                                                 verification_finished_)

    @handle_subtask_key_error
    def __set_subtask_state_finished(self, subtask_id: str) -> SubtaskState:
        task_id = self.subtask2task_mapping[subtask_id]
        ss = self.tasks_states[task_id].subtask_states[subtask_id]
        ss.subtask_progress = 1.0
        ss.subtask_rem_time = 0.0
        ss.subtask_status = SubtaskStatus.finished
        ss.stdout = self.tasks[task_id].get_stdout(subtask_id)
        ss.stderr = self.tasks[task_id].get_stderr(subtask_id)
        ss.results = self.tasks[task_id].get_results(subtask_id)
        return ss

    @handle_subtask_key_error
    def task_computation_failure(self, subtask_id, err):
        task_id = self.subtask2task_mapping[subtask_id]

        subtask_state = self.tasks_states[task_id].subtask_states[subtask_id]
        subtask_status = subtask_state.subtask_status

        if not subtask_status.is_computed():
            logger.warning(
                "Result for subtask {} when subtask state is {}".format(
                    subtask_id, subtask_status.value))
            self.notice_task_updated(task_id,
                                     subtask_id=subtask_id,
                                     op=OtherOp.UNEXPECTED)
            return False

        self.tasks[task_id].computation_failed(subtask_id)
        ss = self.tasks_states[task_id].subtask_states[subtask_id]
        ss.subtask_progress = 1.0
        ss.subtask_rem_time = 0.0
        ss.subtask_status = SubtaskStatus.failure
        ss.stderr = str(err)

        self.notice_task_updated(task_id,
                                 subtask_id=subtask_id,
                                 op=SubtaskOp.FAILED)
        return True

    def task_result_incoming(self, subtask_id):
        node_id = self.get_node_id_for_subtask(subtask_id)

        if node_id and subtask_id in self.subtask2task_mapping:
            task_id = self.subtask2task_mapping[subtask_id]
            if task_id in self.tasks:
                task = self.tasks[task_id]
                states = self.tasks_states[task_id].subtask_states[subtask_id]

                task.result_incoming(subtask_id)
                states.subtask_status = SubtaskStatus.downloading

                self.notice_task_updated(task_id,
                                         subtask_id=subtask_id,
                                         op=SubtaskOp.RESULT_DOWNLOADING)
            else:
                logger.error("Unknown task id: {}".format(task_id))
        else:
            logger.error("Node_id {} or subtask_id {} does not exist".format(
                node_id, subtask_id))

    # CHANGE TO RETURN KEY_ID (check IF SUBTASK COMPUTER HAS KEY_ID
    def check_timeouts(self):
        nodes_with_timeouts = []
        for t in list(self.tasks.values()):
            th = t.header
            if self.tasks_states[th.task_id].status not in self.activeStatus:
                continue
            cur_time = get_timestamp_utc()
            # Check subtask timeout
            ts = self.tasks_states[th.task_id]
            for s in list(ts.subtask_states.values()):
                if s.subtask_status.is_computed():
                    if cur_time > s.deadline:
                        logger.info("Subtask %r dies with status %r",
                                    s.subtask_id, s.subtask_status.value)
                        s.subtask_status = SubtaskStatus.failure
                        nodes_with_timeouts.append(s.node_id)
                        t.computation_failed(s.subtask_id)
                        s.stderr = "[GOLEM] Timeout"
                        self.notice_task_updated(th.task_id,
                                                 subtask_id=s.subtask_id,
                                                 op=SubtaskOp.TIMEOUT)
            # Check task timeout
            if cur_time > th.deadline:
                logger.info("Task %r dies", th.task_id)
                self.tasks_states[th.task_id].status = TaskStatus.timeout
                # TODO: t.tell_it_has_timeout()?
                self.notice_task_updated(th.task_id, op=TaskOp.TIMEOUT)
        return nodes_with_timeouts

    def get_progresses(self):
        tasks_progresses = {}

        for t in list(self.tasks.values()):
            task_id = t.header.task_id
            task_state = self.tasks_states[task_id]
            task_status = task_state.status
            in_progress = not TaskStatus.is_completed(task_status)
            logger.info('Collecting progress %r %r %r', task_id, task_status,
                        in_progress)
            if in_progress:
                ltss = LocalTaskStateSnapshot(
                    task_id, t.get_total_tasks(), t.get_active_tasks(),
                    t.get_progress(),
                    t.short_extra_data_repr(task_state.extra_data)
                )  # FIXME in short_extra_data_repr should there be extra data
                # Issue #2460
                tasks_progresses[task_id] = ltss

        return tasks_progresses

    @handle_task_key_error
    def assert_task_can_be_restarted(self, task_id: str) -> None:
        task_state = self.tasks_states[task_id]
        if task_state.status == TaskStatus.restarted:
            raise self.AlreadyRestartedError()

    @handle_task_key_error
    def put_task_in_restarted_state(self, task_id, clear_tmp=True):
        """
        When restarting task, it's put in a final state 'restarted' and
        a new one is created.
        """
        self.assert_task_can_be_restarted(task_id)
        if clear_tmp:
            self.dir_manager.clear_temporary(task_id)

        task_state = self.tasks_states[task_id]
        task_state.status = TaskStatus.restarted

        for ss in self.tasks_states[task_id].subtask_states.values():
            if ss.subtask_status != SubtaskStatus.failure:
                ss.subtask_status = SubtaskStatus.restarted

        logger.info("Task %s put into restarted state", task_id)
        self.notice_task_updated(task_id, op=TaskOp.RESTARTED)

    @handle_subtask_key_error
    def restart_subtask(self, subtask_id):
        task_id = self.subtask2task_mapping[subtask_id]
        self.tasks[task_id].restart_subtask(subtask_id)
        task_state = self.tasks_states[task_id]
        task_state.status = TaskStatus.computing
        subtask_state = task_state.subtask_states[subtask_id]
        subtask_state.subtask_status = SubtaskStatus.restarted
        subtask_state.stderr = "[GOLEM] Restarted"

        self.notice_task_updated(task_id,
                                 subtask_id=subtask_id,
                                 op=SubtaskOp.RESTARTED)

    @handle_task_key_error
    def restart_frame_subtasks(self, task_id, frame):
        task = self.tasks[task_id]
        task_state = self.tasks_states[task_id]
        subtasks = task.get_subtasks(frame)

        if not subtasks:
            return

        for subtask_id in list(subtasks.keys()):
            task.restart_subtask(subtask_id)
            subtask_state = task_state.subtask_states[subtask_id]
            subtask_state.subtask_status = SubtaskStatus.restarted
            subtask_state.stderr = "[GOLEM] Restarted"
            self.notice_task_updated(task_id,
                                     subtask_id=subtask_id,
                                     op=SubtaskOp.RESTARTED,
                                     persist=False)

        task_state.status = TaskStatus.computing
        self.notice_task_updated(task_id, op=OtherOp.FRAME_RESTARTED)

    @handle_task_key_error
    def abort_task(self, task_id):
        self.tasks[task_id].abort()
        self.tasks_states[task_id].status = TaskStatus.aborted
        for sub in list(self.tasks_states[task_id].subtask_states.values()):
            del self.subtask2task_mapping[sub.subtask_id]
        self.tasks_states[task_id].subtask_states.clear()

        self.notice_task_updated(task_id, op=TaskOp.ABORTED)

    @rpc_utils.expose('comp.task.subtasks.frames')
    @handle_task_key_error
    def get_output_states(self, task_id):
        return self.tasks[task_id].get_output_states()

    @handle_task_key_error
    def delete_task(self, task_id):
        for sub in list(self.tasks_states[task_id].subtask_states.values()):
            del self.subtask2task_mapping[sub.subtask_id]
        self.tasks_states[task_id].subtask_states.clear()

        self.tasks[task_id].unregister_listener(self)
        del self.tasks[task_id]
        del self.tasks_states[task_id]

        self.dir_manager.clear_temporary(task_id)
        self.remove_dump(task_id)
        if self.finished_cb:
            self.finished_cb()

    @handle_task_key_error
    def query_task_state(self, task_id):
        ts = self.tasks_states[task_id]
        t = self.tasks[task_id]

        ts.progress = t.get_progress()
        ts.elapsed_time = time.time() - ts.time_started

        if ts.progress > 0.0:
            proportion = (ts.elapsed_time / ts.progress)
            ts.remaining_time = proportion - ts.elapsed_time
        else:
            ts.remaining_time = None

        t.update_task_state(ts)

        return ts

    def get_subtasks(self, task_id) -> Optional[List[str]]:
        """
        Get all subtasks related to given task id
        :param task_id: Task ID
        :return: list of all subtasks related with @task_id or None
                 if @task_id is not known
        """
        task_state = self.tasks_states.get(task_id)
        if not task_state:
            return None

        subtask_states = list(task_state.subtask_states.values())
        return [subtask_state.subtask_id for subtask_state in subtask_states]

    def get_frame_subtasks(self, task_id: str, frame) \
            -> Optional[Dict[str, SubtaskState]]:
        task: Optional[Task] = self.tasks.get(task_id)
        if not task:
            return None
        if not isinstance(task, CoreTask):
            return None
        return task.get_subtasks(frame)

    def change_config(self, root_path, use_distributed_resource_management):
        self.dir_manager = DirManager(root_path)
        self.use_distributed_resources = use_distributed_resource_management

    def get_task_id(self, subtask_id):
        return self.subtask2task_mapping[subtask_id]

    def get_task_dict(self, task_id) -> Optional[Dict]:
        task = self.tasks.get(task_id)
        if not task:  # task might have been deleted after the request was made
            return None

        task_type_name = task.task_definition.task_type.lower()
        task_type = self.task_types[task_type_name]
        state = self.query_task_state(task.header.task_id)

        dictionary = {
            'duration': state.elapsed_time,
            # single=True retrieves one preview file. If rendering frames,
            # it's the preview of the most recently computed frame.
            'preview': task_type.get_preview(task, single=True)
        }

        return update_dict(dictionary, task.to_dictionary(),
                           state.to_dictionary(),
                           self.get_task_definition_dict(task))

    def get_tasks_dict(self) -> List[Dict]:
        task_ids = list(self.tasks.keys())
        mapped = map(self.get_task_dict, task_ids)
        filtered = filter(None, mapped)
        return list(filtered)

    def get_subtask_dict(self, subtask_id):
        task_id = self.subtask2task_mapping[subtask_id]
        task_state = self.tasks_states[task_id]
        subtask = task_state.subtask_states[subtask_id]
        return subtask.to_dictionary()

    def get_subtasks_dict(self, task_id):
        task_state = self.tasks_states[task_id]
        subtasks = task_state.subtask_states
        if subtasks:
            return [subtask.to_dictionary() for subtask in subtasks.values()]

    @rpc_utils.expose('comp.task.subtasks.borders')
    def get_subtasks_borders(self, task_id, part=1):
        task = self.tasks[task_id]
        task_type_name = task.task_definition.task_type.lower()
        task_type = self.task_types[task_type_name]
        subtasks_count = task.get_total_tasks()

        return {
            to_unicode(subtask_id):
            task_type.get_task_border(subtask,
                                      task.task_definition,
                                      subtasks_count,
                                      as_path=True)
            for subtask_id, subtask in task.get_subtasks(part).items()
        }

    def get_task_preview(self, task_id, single=False):
        task = self.tasks[task_id]
        task_type_name = task.task_definition.task_type.lower()
        task_type = self.task_types[task_type_name]
        return task_type.get_preview(task, single=single)

    def add_comp_task_request(self, theader, price):
        """ Add a header of a task which this node may try to compute """
        self.comp_task_keeper.add_request(theader, price)

    def get_estimated_cost(self, task_type, options):
        try:
            subtask_value = options['price'] * options['subtask_time']
            return options['num_subtasks'] * subtask_value
        except (KeyError, ValueError):
            logger.exception("Cannot estimate price, wrong params")
            return None

    def __add_subtask_to_tasks_states(self, node_name, node_id, ctd, address):

        logger.debug('add_subtask_to_tasks_states(%r, %r, %r, %r)', node_name,
                     node_id, ctd, address)

        ss = SubtaskState()
        ss.time_started = time.time()
        ss.node_id = node_id
        ss.node_name = node_name
        ss.deadline = ctd['deadline']
        ss.subtask_definition = ctd['short_description']
        ss.subtask_id = ctd['subtask_id']
        ss.extra_data = ctd['extra_data']
        ss.subtask_status = SubtaskStatus.starting

        (self.tasks_states[ctd['task_id']].subtask_states[ctd['subtask_id']]
         ) = ss

    def notify_update_task(self, task_id):
        self.notice_task_updated(task_id)

    @handle_task_key_error
    def notice_task_updated(self,
                            task_id: str,
                            subtask_id: str = None,
                            op: Operation = None,
                            persist: bool = True):
        """Called when a task is modified, saves the task and
        propagates information

        Whenever task is changed `notice_task_updated` should be called
        to save the task - if the change is save-worthy, as specified
        by the `persist` parameter - and propagate information about
        changed task to other parts of the system.

        Most of the calls are save-worthy, but a minority is not: for
        instance when the work offer is received, the task does not
        change so saving it does not make sense, but it still makes
        sense to let other parts of the system know about the change.
        Also, when a number of minor changes are always followed by a
        major one, as it is with restarting a frame task, it does not
        make sense to store all the partial changes, so only the
        final one is considered save-worthy.

        :param str task_id: id of the updated task
        :param str subtask_id: if the operation done on the
          task is related to a subtask, id of that subtask
        :param Operation op: performed operation
        :param bool persist: should the task be persisted now
        """
        # self.save_state()

        logger.debug(
            "Notice task updated. task_id=%s, subtask_id=%s,"
            "op=%s, persist=%s",
            task_id,
            subtask_id,
            op,
            persist,
        )

        if persist and self.task_persistence:
            self.dump_task(task_id)

        task_state = self.tasks_states.get(task_id)
        dispatcher.send(
            signal='golem.taskmanager',
            event='task_status_updated',
            task_id=task_id,
            task_state=task_state,
            subtask_id=subtask_id,
            op=op,
        )

        if self.finished_cb and persist and op \
                and op.task_related() and op.is_completed():
            self.finished_cb()