def setUp(self): super().setUp() task_id = str(uuid.uuid4()) dir_manager = DirManager(self.path) res_dir = dir_manager.get_task_temporary_dir(task_id) out_dir = os.path.join(res_dir, 'out_dir') out_dir_file = os.path.join(out_dir, 'dir_file') out_file = os.path.join(res_dir, 'out_file') os.makedirs(out_dir, exist_ok=True) with open(out_file, 'w') as f: f.write("File contents") with open(out_dir_file, 'w') as f: f.write("Dir file contents") self.dir_manager = dir_manager self.task_id = task_id self.secret = FileEncryptor.gen_secret(10, 20) self.disk_files = [out_file, out_dir_file] disk_file_names = [os.path.basename(f) for f in self.disk_files] self.all_files = disk_file_names self.res_dir = res_dir self.out_dir = out_dir self.out_path = os.path.join(self.out_dir, str(uuid.uuid4()))
def testGetTaskTemporaryDir(self): dm = DirManager(self.path) task_id = '12345' tmp_dir = dm.get_task_temporary_dir(task_id) expected_tmp_dir = os.path.join(self.path, task_id, 'tmp') self.assertEquals(os.path.normpath(tmp_dir), expected_tmp_dir) self.assertTrue(os.path.isdir(tmp_dir)) tmp_dir = dm.get_task_temporary_dir(task_id) self.assertTrue(os.path.isdir(tmp_dir)) tmp_dir = dm.get_task_temporary_dir(task_id, create=False) self.assertTrue(os.path.isdir(tmp_dir)) self.assertEquals(os.path.normpath(tmp_dir), expected_tmp_dir) shutil.rmtree(tmp_dir) tmp_dir = dm.get_task_temporary_dir(task_id, create=False) self.assertFalse(os.path.isdir(tmp_dir)) tmp_dir = dm.get_task_temporary_dir(task_id, create=True) self.assertTrue(os.path.isdir(tmp_dir))
def setUp(self): super().setUp() task_id = str(uuid.uuid4()) dir_manager = DirManager(self.path) res_dir = dir_manager.get_task_temporary_dir(task_id) out_dir = os.path.join(res_dir, 'out_dir') os.makedirs(out_dir, exist_ok=True) self.dir_manager = dir_manager self.task_id = task_id self.secret = FileEncryptor.gen_secret(10, 20) # Create directory structure: # |-- directory # |-- directory2 # | |-- directory3 # | | `-- file.txt # | `-- file.txt # `-- file.txt file_path = os.path.join(res_dir, "file.txt") directory_path = os.path.join(res_dir, "directory") directory2_path = os.path.join(res_dir, "directory2/") directory2_file_path = os.path.join(directory2_path, "file.txt") directory3_path = os.path.join(directory2_path, "directory3/") directory3_file_path = os.path.join(directory3_path, "file.txt") os.makedirs(directory_path) os.makedirs(directory2_path) os.makedirs(directory3_path) with open(file_path, 'w') as out: out.write("content") with open(directory2_file_path, 'w') as out: out.write("content") with open(directory3_file_path, 'w') as out: out.write("content") self.disk_files = [ file_path, directory_path, directory2_path, ] self.expected_results = [ os.path.basename(file_path), os.path.basename(directory_path), os.path.relpath(directory2_path, res_dir), os.path.relpath(directory3_path, res_dir), os.path.relpath(directory2_file_path, res_dir), os.path.relpath(directory3_file_path, res_dir) ] self.res_dir = res_dir self.out_dir = out_dir self.out_path = os.path.join(self.out_dir, str(uuid.uuid4()))
def test_advanced_verification(self): bb = BlenderBenchmark() td = bb.task_definition td.verification_options = AdvanceRenderingVerificationOptions() td.verification_options.type = 'forAll' dm = DirManager(self.tempdir) builder = BlenderRenderTaskBuilder(node_name="ABC", task_definition=bb.task_definition, root_path=self.tempdir, dir_manager=dm) task = builder.build() tmpdir = dm.get_task_temporary_dir(task.header.task_id, True) ed = task.query_extra_data(1000, 4, "NODE_ID", "NODE_NAME") file_ = path.join(tmpdir, 'preview.bmp') img = Image.new("RGB", (task.res_x, task.res_y)) img.save(file_, "BMP") task.computation_finished(ed.ctd.subtask_id, [file_], 1) assert task.subtasks_given[ed.ctd.subtask_id]['status'] == \ SubtaskStatus.failure
class TestFileHelper(TestDirFixture): """ Tests for FileHelper class """ def setUp(self): TestDirFixture.setUp(self) self.dir_manager = DirManager(self.path) self.res_dir = self.dir_manager.get_task_temporary_dir('test_task') self.test_file_path = os.path.join(self.res_dir, 'test_file') open(self.test_file_path, 'w').close() def test_file_helper(self): """ Test opening file with FileHelper """ mode = 'r' # Test opening with file path with FileHelper(self.test_file_path, mode) as f: self.assertIsInstance(f, file) self.assertEqual(f.mode, mode) # Test opening with file with open(self.test_file_path, mode) as file_: with FileHelper(file_, mode) as f: self.assertIsInstance(f, file) self.assertEqual(f.mode, mode)
def testClearTemporary(self): dm = DirManager(self.path) task_id = '12345' tmp_dir = dm.get_task_temporary_dir(task_id) self.assertTrue(os.path.isdir(tmp_dir)) file1 = os.path.join(tmp_dir, 'file1') file2 = os.path.join(tmp_dir, 'file2') dir1 = os.path.join(tmp_dir, 'dir1') file3 = os.path.join(dir1, 'file3') open(file1, 'w').close() open(file2, 'w').close() if not os.path.isdir(dir1): os.mkdir(dir1) open(file3, 'w').close() self.assertTrue(os.path.isfile(file1)) self.assertTrue(os.path.isfile(file2)) self.assertTrue(os.path.isfile(file3)) self.assertTrue(os.path.isdir(dir1)) dm.clear_temporary(task_id) self.assertTrue(os.path.isdir(tmp_dir)) self.assertFalse(os.path.isfile(file1)) self.assertFalse(os.path.isfile(file2)) self.assertFalse(os.path.isfile(file3)) self.assertFalse(os.path.isdir(dir1))
class TestResourcesManager(TestDirFixture): def setUp(self): TestDirFixture.setUp(self) self.dir_manager = DirManager(self.path) res_path = self.dir_manager.get_task_resource_dir('task2') file1 = os.path.join(res_path, 'file1') file2 = os.path.join(res_path, 'file2') dir1 = os.path.join(res_path, 'dir1') file3 = os.path.join(dir1, 'file3') open(file1, 'w').close() open(file2, 'w').close() if not os.path.isdir(dir1): os.mkdir(dir1) open(file3, 'w').close() def testInit(self): self.assertIsNotNone(ResourcesManager(self.dir_manager, 'owner')) def testGetResourceHeader(self): rm = ResourcesManager(self.dir_manager, 'owner') header = rm.get_resource_header('task2') self.assertEquals(len(header.files_data), 2) self.assertEquals(len(header.sub_dir_headers[0].files_data), 1) header2 = rm.get_resource_header('task3') self.assertEquals(len(header2.files_data), 0) self.assertEquals(len(header2.sub_dir_headers), 0) def testGetResourceDelta(self): rm = ResourcesManager(self.dir_manager, 'owner') header = rm.get_resource_header('task2') delta = rm.get_resource_delta('task2', header) self.assertEquals(len(delta.files_data), 0) self.assertEquals(len(delta.sub_dir_resources[0].files_data), 0) header2 = rm.get_resource_header('task3') delta2 = rm.get_resource_delta('task2', header2) self.assertEquals(len(delta2.files_data), 2) self.assertEquals(len(delta2.sub_dir_resources[0].files_data), 1) res_path = self.dir_manager.get_task_resource_dir('task2') file5 = os.path.join(res_path, 'file5') open(file5, 'w').close() dir1 = os.path.join(res_path, 'dir1') file4 = os.path.join(dir1, 'file4') open(file4, 'w').close() delta3 = rm.get_resource_delta('task2', header) self.assertEquals(len(delta3.files_data), 1) self.assertEquals(len(delta3.sub_dir_resources[0].files_data), 1) os.remove(file4) os.remove(file5) # # def testPrepareResourceDelta(self): # assert False # # def testUpdateResource(self): # assert False # def testGetResourceDir(self): rm = ResourcesManager(self.dir_manager, 'owner') resDir = rm.get_resource_dir('task2') self.assertTrue(os.path.isdir(resDir)) self.assertEqual(resDir, self.dir_manager.get_task_resource_dir('task2')) def testGetTemporaryDir(self): rm = ResourcesManager(self.dir_manager, 'owner') tmp_dir = rm.get_temporary_dir('task2') self.assertTrue(os.path.isdir(tmp_dir)) self.assertEqual(tmp_dir, self.dir_manager.get_task_temporary_dir('task2')) def testGetOutputDir(self): rm = ResourcesManager(self.dir_manager, 'owner') outDir = rm.get_output_dir('task2') self.assertTrue(os.path.isdir(outDir)) self.assertEqual(outDir, self.dir_manager.get_task_output_dir('task2'))
class TestAESFileEncryptor(TestDirFixture): """ Test encryption using AESFileEncryptor """ def setUp(self): TestDirFixture.setUp(self) self.dir_manager = DirManager(self.path) self.res_dir = self.dir_manager.get_task_temporary_dir('test_task') self.test_file_path = os.path.join(self.res_dir, 'test_file') self.enc_file_path = os.path.join(self.res_dir, 'test_file.enc') with open(self.test_file_path, 'wb') as f: for i in xrange(0, 100): f.write(bytearray(random.getrandbits(8) for _ in xrange(32))) def test_encrypt(self): """ Test encryption procedure """ secret = FileEncryptor.gen_secret(10, 20) if os.path.exists(self.enc_file_path): os.remove(self.enc_file_path) AESFileEncryptor.encrypt(self.test_file_path, self.enc_file_path, secret) self.assertTrue(os.path.exists(self.enc_file_path)) with open(self.enc_file_path, 'rb') as f: encrypted = f.read() self.assertEqual( len(encrypted) % AESFileEncryptor.block_size, 0, "Incorrect ciphertext size: {}. Should be multiple of {}".format(len(encrypted), AESFileEncryptor.block_size)) def test_decrypt(self): """ Test decryption procedure """ secret = FileEncryptor.gen_secret(10, 20) decrypted_path = self.test_file_path + ".dec" if os.path.exists(self.enc_file_path): os.remove(self.enc_file_path) AESFileEncryptor.encrypt(self.test_file_path, self.enc_file_path, secret) AESFileEncryptor.decrypt(self.enc_file_path, decrypted_path, secret) self.assertEqual(os.path.getsize(self.test_file_path), os.path.getsize(decrypted_path)) with open(self.test_file_path) as f1, open(decrypted_path) as f2: while True: chunk1 = f1.read(32) chunk2 = f2.read(32) if chunk1 != chunk2: raise ValueError("Invalid decrypted file chunk") elif not chunk1 and not chunk2: break AESFileEncryptor.decrypt(self.enc_file_path, decrypted_path, secret + "0") decrypted = True if os.path.getsize(self.test_file_path) != os.path.getsize(decrypted_path): decrypted = False else: with open(self.test_file_path) as f1, open(decrypted_path) as f2: while True: chunk1 = f1.read(32) chunk2 = f2.read(32) if chunk1 != chunk2: decrypted = False break elif not chunk1 and not chunk2: break self.assertFalse(decrypted) def test_get_key_and_iv(self): """ Test helper methods: gen_salt and get_key_and_iv """ salt = AESFileEncryptor.gen_salt(AESFileEncryptor.block_size) self.assertEqual(len(salt), AESFileEncryptor.block_size - AESFileEncryptor.salt_prefix_len) secret = FileEncryptor.gen_secret(10, 20) self.assertGreaterEqual(len(secret), 10) self.assertLessEqual(len(secret), 20) key_len = 32 iv_len = AESFileEncryptor.block_size key, iv = AESFileEncryptor.get_key_and_iv(secret, salt, key_len, iv_len) self.assertEqual(len(key), key_len) self.assertEqual(len(iv), iv_len)
class TaskManager(TaskEventListener): """ Keeps and manages information about requested tasks """ handle_task_key_error = HandleKeyError(log_task_key_error) handle_subtask_key_error = HandleKeyError(log_subtask_key_error) def __init__(self, node_name, node, keys_auth, listen_address="", listen_port=0, root_path="res", use_distributed_resources=True, tasks_dir="tasks", task_persistence=False): super(TaskManager, self).__init__() self.apps_manager = AppsManager() self.apps_manager.load_apps() apps = self.apps_manager.apps.values() task_types = [app.task_type_info(None, app.controller) for app in apps] self.task_types = {t.name.lower(): t for t in task_types} self.node_name = node_name self.node = node self.keys_auth = keys_auth self.key_id = keys_auth.get_key_id() self.tasks = {} self.tasks_states = {} self.subtask2task_mapping = {} self.listen_address = listen_address self.listen_port = listen_port # FIXME Remove this variable and make task persistance obligatory after it is more tested # Remember to also remove it from init params self.task_persistence = task_persistence self.tasks_dir = Path(tasks_dir) if not self.tasks_dir.is_dir(): self.tasks_dir.mkdir(parents=True) self.root_path = root_path self.dir_manager = DirManager(self.get_task_manager_root()) # resource_manager = OpenStackSwiftResourceManager(self.dir_manager, # resource_dir_method=self.dir_manager.get_task_temporary_dir) resource_manager = HyperdriveResourceManager( self.dir_manager, resource_dir_method=self.dir_manager.get_task_temporary_dir) self.task_result_manager = EncryptedResultPackageManager( resource_manager) self.activeStatus = [ TaskStatus.computing, TaskStatus.starting, TaskStatus.waiting ] self.use_distributed_resources = use_distributed_resources self.comp_task_keeper = CompTaskKeeper(self.tasks_dir, persist=self.task_persistence) if self.task_persistence: self.restore_tasks() def get_task_manager_root(self): return self.root_path def get_external_address(self): request = AsyncRequest(get_external_address, self.listen_port) return async_run(request) def create_task(self, dictionary): # FIXME: remove after the new interface has been integrated with if not isinstance(dictionary, dict): return dictionary type_name = dictionary['type'].lower() task_type = self.task_types[type_name] builder_type = task_type.task_builder_type definition = builder_type.build_definition(task_type, dictionary) builder = builder_type(self.node_name, definition, self.root_path, self.dir_manager) return Task.build_task(builder) def get_task_definition_dict(self, task): if isinstance(task, dict): return task definition = task.task_definition task_type = self.task_types[definition.task_type.lower()] return task_type.task_builder_type.build_dictionary(definition) @inlineCallbacks def add_new_task(self, task): if task.header.task_id in self.tasks: raise RuntimeError("Task has been already added") if not self.key_id: raise ValueError("'key_id' is not set") if not SocketAddress.is_proper_address(self.listen_address, self.listen_port): raise IOError("Incorrect socket address") prev_pub_addr, prev_pub_port, prev_nat_type = self.node.pub_addr, self.node.pub_port, self.node.nat_type self.node.pub_addr, self.node.pub_port, self.node.nat_type = yield self.get_external_address( ) if prev_pub_addr != self.node.pub_addr or \ prev_pub_port != self.node.pub_port or \ prev_nat_type != self.node.nat_type: self.update_task_signatures() task.header.task_owner_address = self.listen_address task.header.task_owner_port = self.listen_port task.header.task_owner_key_id = self.key_id task.header.task_owner = self.node task.header.signature = self.sign_task_header(task.header) self.dir_manager.clear_temporary(task.header.task_id, undeletable=task.undeletable) self.dir_manager.get_task_temporary_dir(task.header.task_id, create=True) task.register_listener(self) task.task_status = TaskStatus.waiting self.tasks[task.header.task_id] = task ts = TaskState() ts.status = TaskStatus.waiting ts.outputs = task.get_output_names() ts.total_subtasks = task.get_total_tasks() ts.time_started = time.time() self.tasks_states[task.header.task_id] = ts if self.task_persistence: self.dump_task(task.header.task_id) logger.info("Task {} added".format(task.header.task_id)) self.notice_task_updated(task.header.task_id) def dump_task(self, task_id): logger.debug('DUMP TASK') try: data = self.tasks[task_id], self.tasks_states[task_id] filepath = self.tasks_dir / ('%s.pickle' % (task_id, )) logger.debug('DUMP TASK %r', filepath) with filepath.open('wb') as f: pickle.dump(data, f, protocol=2) except: logger.exception('DUMP ERROR task_id: %r task: %r state: %r', task_id, self.tasks.get(task_id, '<not found>'), self.tasks_states.get(task_id, '<not found>')) if filepath.exists(): filepath.unlink() raise def restore_tasks(self): logger.debug('RESTORE TASKS') for path in self.tasks_dir.iterdir(): logger.debug('RESTORE TASKS %r', path) if not path.suffix == '.pickle': continue logger.debug('RESTORE TASKS really %r', path) with path.open('rb') as f: try: task, state = pickle.load(f) self.tasks[task.header.task_id] = task self.tasks_states[task.header.task_id] = state except (pickle.UnpicklingError, EOFError, ImportError): logger.exception('Problem restoring task from: %s', path) path.unlink() continue dispatcher.send(signal='golem.taskmanager', event='task_restored', task=task, state=state) @handle_task_key_error def resources_send(self, task_id): self.tasks_states[task_id].status = TaskStatus.waiting self.tasks[task_id].task_status = TaskStatus.waiting self.notice_task_updated(task_id) logger.info("Resources for task {} sent".format(task_id)) def get_next_subtask(self, node_id, node_name, task_id, estimated_performance, price, max_resource_size, max_memory_size, num_cores=0, address=""): """ Assign next subtask from task <task_id> to node with given id <node_id> and name. If subtask is assigned the function is returning a tuple ( :param node_id: :param node_name: :param task_id: :param estimated_performance: :param price: :param max_resource_size: :param max_memory_size: :param num_cores: :param address: :return (ComputeTaskDef|None, bool, bool): Function returns a triplet. First element is either ComputeTaskDef that describe assigned subtask or None. The second element describes whether the task_id is a wrong task that isn't in task manager register. If task with <task_id> it's a known task then second element of a pair is always False (regardless new subtask was assigned or not). The third element describes whether we're waiting for client's other task results. """ logger.debug('get_next_subtask(%r, %r, %r, %r, %r, %r, %r, %r, %r)', node_id, node_name, task_id, estimated_performance, price, max_resource_size, max_memory_size, num_cores, address) if task_id not in self.tasks: logger.info("Cannot find task {} in my tasks".format(task_id)) return None, True, False task = self.tasks[task_id] if task.header.max_price < price: return None, False, False def has_subtasks(): if self.tasks_states[task_id].status not in self.activeStatus: logger.debug('state no in activestatus') return False if not task.needs_computation(): logger.debug('not task.needs_computation') return False if task.header.resource_size > (long(max_resource_size) * 1024): logger.debug('resources size >') return False if task.header.estimated_memory > (long(max_memory_size) * 1024): logger.debug('estimated memory >') return False return True if not has_subtasks(): logger.info( "Cannot get next task for estimated performance {}".format( estimated_performance)) return None, False, False extra_data = task.query_extra_data(estimated_performance, num_cores, node_id, node_name) if extra_data.should_wait: return None, False, True ctd = extra_data.ctd def check_compute_task_def(): if not isinstance(ctd, ComputeTaskDef) or not ctd.subtask_id: logger.debug('check ctd: ctd not instance or not subtask_id') return False if task_id != ctd.task_id or ctd.subtask_id in self.subtask2task_mapping: logger.debug( 'check ctd: %r != %r or %r in self.subtask2task_maping', task_id, ctd.task_id, ctd.subtask_id) return False if ctd.subtask_id in self.tasks_states[ctd.task_id].subtask_states: logger.debug('check ctd: subtask_states') return False return True if not check_compute_task_def(): return None, False, False ctd.key_id = task.header.task_owner_key_id ctd.return_address = task.header.task_owner_address ctd.return_port = task.header.task_owner_port ctd.task_owner = task.header.task_owner self.subtask2task_mapping[ctd.subtask_id] = task_id self.__add_subtask_to_tasks_states(node_name, node_id, price, ctd, address) self.notice_task_updated(task_id) return ctd, False, extra_data.should_wait def get_tasks_headers(self): ret = [] for t in self.tasks.values(): if t.needs_computation() and t.task_status in self.activeStatus: ret.append(t.header) return ret def get_trust_mod(self, subtask_id): if subtask_id in self.subtask2task_mapping: task_id = self.subtask2task_mapping[subtask_id] return self.tasks[task_id].get_trust_mod(subtask_id) else: logger.error("This is not my subtask {}".format(subtask_id)) return 0 def update_task_signatures(self): for task in self.tasks.values(): task.header.signature = self.sign_task_header(task.header) def sign_task_header(self, task_header): return self.keys_auth.sign(task_header.to_binary()) def verify_subtask(self, subtask_id): if subtask_id in self.subtask2task_mapping: task_id = self.subtask2task_mapping[subtask_id] return self.tasks[task_id].verify_subtask(subtask_id) else: return False def get_node_id_for_subtask(self, subtask_id): if subtask_id in self.subtask2task_mapping: subtask_state = self.tasks_states[self.subtask2task_mapping[ subtask_id]].subtask_states[subtask_id] return subtask_state.computer.node_id else: return None def set_value(self, task_id, subtask_id, value): if type(value) not in (int, long): raise TypeError( "Incorrect 'value' type: {}. Should be int or long".format( type(value))) task_state = self.tasks_states.get(task_id) if task_state is None: logger.warning("This is not my task {}".format(task_id)) return subtask_state = task_state.subtask_states.get(subtask_id) if subtask_state is None: logger.warning("This is not my subtask {}".format(subtask_id)) return subtask_state.value = value @handle_subtask_key_error def get_value(self, subtask_id): """ Return value of a given subtask :param subtask_id: id of a computed subtask :return long: price that should be paid for given subtask """ task_id = self.subtask2task_mapping[subtask_id] return self.tasks_states[task_id].subtask_states[subtask_id].value @handle_subtask_key_error def computed_task_received(self, subtask_id, result, result_type): task_id = self.subtask2task_mapping[subtask_id] subtask_state = self.tasks_states[task_id].subtask_states[subtask_id] subtask_status = subtask_state.subtask_status if not SubtaskStatus.is_computed(subtask_status): logger.warning( "Result for subtask {} when subtask state is {}".format( subtask_id, subtask_status)) self.notice_task_updated(task_id) return False self.tasks[task_id].computation_finished(subtask_id, result, result_type) ss = self.tasks_states[task_id].subtask_states[subtask_id] ss.subtask_progress = 1.0 ss.subtask_rem_time = 0.0 ss.subtask_status = SubtaskStatus.finished ss.stdout = self.tasks[task_id].get_stdout(subtask_id) ss.stderr = self.tasks[task_id].get_stderr(subtask_id) ss.results = self.tasks[task_id].get_results(subtask_id) if not self.tasks[task_id].verify_subtask(subtask_id): logger.debug("Subtask {} not accepted\n".format(subtask_id)) ss.subtask_status = SubtaskStatus.failure self.notice_task_updated(task_id) return False if self.tasks_states[task_id].status in self.activeStatus: if not self.tasks[task_id].finished_computation(): self.tasks_states[task_id].status = TaskStatus.computing else: if self.tasks[task_id].verify_task(): logger.debug("Task {} accepted".format(task_id)) self.tasks_states[task_id].status = TaskStatus.finished else: logger.debug("Task {} not accepted".format(task_id)) self.notice_task_updated(task_id) return True @handle_subtask_key_error def task_computation_failure(self, subtask_id, err): task_id = self.subtask2task_mapping[subtask_id] subtask_state = self.tasks_states[task_id].subtask_states[subtask_id] subtask_status = subtask_state.subtask_status if not SubtaskStatus.is_computed(subtask_status): logger.warning( "Result for subtask {} when subtask state is {}".format( subtask_id, subtask_status)) self.notice_task_updated(task_id) return False self.tasks[task_id].computation_failed(subtask_id) ss = self.tasks_states[task_id].subtask_states[subtask_id] ss.subtask_progress = 1.0 ss.subtask_rem_time = 0.0 ss.subtask_status = SubtaskStatus.failure ss.stderr = str(err) self.notice_task_updated(task_id) return True def task_result_incoming(self, subtask_id): node_id = self.get_node_id_for_subtask(subtask_id) if node_id and subtask_id in self.subtask2task_mapping: task_id = self.subtask2task_mapping[subtask_id] if task_id in self.tasks: task = self.tasks[task_id] states = self.tasks_states[task_id].subtask_states[subtask_id] task.result_incoming(subtask_id) states.subtask_status = SubtaskStatus.downloading self.notify_update_task(task_id) else: logger.error("Unknown task id: {}".format(task_id)) else: logger.error("Node_id {} or subtask_id {} does not exist".format( node_id, subtask_id)) # CHANGE TO RETURN KEY_ID (check IF SUBTASK COMPUTER HAS KEY_ID def check_timeouts(self): nodes_with_timeouts = [] for t in self.tasks.values(): th = t.header if self.tasks_states[th.task_id].status not in self.activeStatus: continue cur_time = get_timestamp_utc() if cur_time > th.deadline: logger.info("Task {} dies".format(th.task_id)) t.task_stats = TaskStatus.timeout self.tasks_states[th.task_id].status = TaskStatus.timeout self.notice_task_updated(th.task_id) ts = self.tasks_states[th.task_id] for s in ts.subtask_states.values(): if SubtaskStatus.is_computed(s.subtask_status): if cur_time > s.deadline: logger.info("Subtask {} dies".format(s.subtask_id)) s.subtask_status = SubtaskStatus.failure nodes_with_timeouts.append(s.computer.node_id) t.computation_failed(s.subtask_id) s.stderr = "[GOLEM] Timeout" self.notice_task_updated(th.task_id) return nodes_with_timeouts def get_progresses(self): tasks_progresses = {} for t in self.tasks.values(): if t.get_progress() < 1.0: ltss = LocalTaskStateSnapshot(t.header.task_id, t.get_total_tasks(), t.get_active_tasks(), t.get_progress(), t.short_extra_data_repr(2200.0)) tasks_progresses[t.header.task_id] = ltss return tasks_progresses @handle_task_key_error def get_resources(self, task_id, resource_header, resource_type=0): task = self.tasks[task_id] return task.get_resources(resource_header, resource_type) @handle_task_key_error def restart_task(self, task_id): logger.info("restarting task") self.dir_manager.clear_temporary( task_id, undeletable=self.tasks[task_id].undeletable) self.tasks[task_id].restart() self.tasks[task_id].task_status = TaskStatus.waiting self.tasks_states[task_id].status = TaskStatus.waiting self.tasks_states[task_id].time_started = time.time() for ss in self.tasks_states[task_id].subtask_states.values(): if ss.subtask_status != SubtaskStatus.failure: ss.subtask_status = SubtaskStatus.restarted self.notice_task_updated(task_id) @handle_subtask_key_error def restart_subtask(self, subtask_id): task_id = self.subtask2task_mapping[subtask_id] self.tasks[task_id].restart_subtask(subtask_id) self.tasks_states[task_id].status = TaskStatus.computing self.tasks_states[task_id].subtask_states[ subtask_id].subtask_status = SubtaskStatus.restarted self.tasks_states[task_id].subtask_states[ subtask_id].stderr = "[GOLEM] Restarted" self.notice_task_updated(task_id) @handle_task_key_error def abort_task(self, task_id): self.tasks[task_id].abort() self.tasks[task_id].task_status = TaskStatus.aborted self.tasks_states[task_id].status = TaskStatus.aborted for sub in self.tasks_states[task_id].subtask_states.values(): del self.subtask2task_mapping[sub.subtask_id] self.tasks_states[task_id].subtask_states.clear() self.notice_task_updated(task_id) @handle_task_key_error def pause_task(self, task_id): self.tasks[task_id].task_status = TaskStatus.paused self.tasks_states[task_id].status = TaskStatus.paused self.notice_task_updated(task_id) @handle_task_key_error def resume_task(self, task_id): self.tasks[task_id].task_status = TaskStatus.starting self.tasks_states[task_id].status = TaskStatus.starting self.notice_task_updated(task_id) @handle_task_key_error def delete_task(self, task_id): for sub in self.tasks_states[task_id].subtask_states.values(): del self.subtask2task_mapping[sub.subtask_id] self.tasks_states[task_id].subtask_states.clear() self.tasks[task_id].unregister_listener(self) del self.tasks[task_id] del self.tasks_states[task_id] self.dir_manager.clear_temporary(task_id) @handle_task_key_error def query_task_state(self, task_id): ts = self.tasks_states[task_id] t = self.tasks[task_id] ts.progress = t.get_progress() ts.elapsed_time = time.time() - ts.time_started if ts.progress > 0.0: ts.remaining_time = (ts.elapsed_time / ts.progress) - ts.elapsed_time else: ts.remaining_time = -0.0 t.update_task_state(ts) return ts def get_subtasks(self, task_id): """ Get all subtasks related to given task id :param task_id: Task ID :return: list of all subtasks related with @task_id or None if @task_id is not known """ if task_id not in self.tasks_states: return None return [ sub.subtask_id for sub in self.tasks_states[task_id].subtask_states.values() ] def change_config(self, root_path, use_distributed_resource_management): self.dir_manager = DirManager(root_path) self.use_distributed_resources = use_distributed_resource_management @handle_task_key_error def change_timeouts(self, task_id, full_task_timeout, subtask_timeout): task = self.tasks[task_id] task.header.deadline = timeout_to_deadline(full_task_timeout) task.header.subtask_timeout = subtask_timeout task.full_task_timeout = full_task_timeout task.header.last_checking = time.time() def get_task_id(self, subtask_id): return self.subtask2task_mapping[subtask_id] def get_task_dict(self, task_id): task = self.tasks[task_id] # single=True retrieves one preview file. If rendering frames, # it's the preview of the most recently computed frame. dictionary = {u'preview': task.get_preview(single=True)} dictionary.update(self.get_simple_task_dict(task)) dictionary.update(self.get_task_definition_dict(task)) return dictionary def get_tasks_dict(self): return [self.get_simple_task_dict(t) for t in self.tasks.itervalues()] def get_subtask_dict(self, subtask_id): task_id = self.subtask2task_mapping[subtask_id] task_state = self.tasks_states[task_id] subtask = task_state.subtask_states[subtask_id] return subtask.to_dictionary() def get_subtasks_dict(self, task_id): task_state = self.tasks_states[task_id] subtasks = task_state.subtask_states return [subtask.to_dictionary() for subtask in subtasks.itervalues()] def get_subtasks_borders(self, task_id): task = self.tasks[task_id] task_type_name = task.task_definition.task_type.lower() task_type = self.task_types[task_type_name] task_state = self.tasks_states[task_id] total_subtasks = task.get_total_tasks() return { to_unicode(subtask.subtask_id): task_type.get_task_border(subtask, task.task_definition, total_subtasks, as_path=True) for subtask in task_state.subtask_states.values() } def get_simple_task_dict(self, task): state = self.tasks_states.get(task.header.task_id) timeout = task.task_definition.full_task_timeout dictionary = {u'duration': max(timeout - state.remaining_time, 0)} dictionary.update(task.to_dictionary()) dictionary.update(state.to_dictionary()) return dictionary def get_task_preview(self, task_id, single=False): return self.tasks[task_id].get_preview(single=single) @handle_subtask_key_error def set_computation_time(self, subtask_id, computation_time): """ Set computation time for subtask and also compute and set new value based on saved price for this subtask :param str subtask_id: subtask which was computed in given computation_time :param float computation_time: how long does it take to compute this task :return: """ task_id = self.subtask2task_mapping[subtask_id] ss = self.tasks_states[task_id].subtask_states[subtask_id] ss.computation_time = computation_time ss.value = compute_subtask_value(ss.computer.price, computation_time) def add_comp_task_request(self, theader, price): """ Add a header of a task which this node may try to compute """ self.comp_task_keeper.add_request(theader, price) @handle_task_key_error def get_payment_for_task_id(self, task_id): val = 0.0 t = self.tasks_states[task_id] for ss in t.subtask_states.values(): val += ss.value return val def __add_subtask_to_tasks_states(self, node_name, node_id, price, ctd, address): if ctd.task_id not in self.tasks_states: raise RuntimeError("Should never be here!") logger.debug('add_subtask_to_tasks_states(%r, %r, %r, %r, %r)', node_name, node_id, price, ctd, address) ss = SubtaskState() ss.computer.node_id = node_id ss.computer.node_name = node_name ss.computer.performance = ctd.performance ss.computer.ip_address = address ss.computer.price = price ss.time_started = time.time() ss.deadline = ctd.deadline # TODO: read node ip address ss.subtask_definition = ctd.short_description ss.subtask_id = ctd.subtask_id ss.extra_data = ctd.extra_data ss.subtask_status = TaskStatus.starting ss.value = 0 self.tasks_states[ctd.task_id].subtask_states[ctd.subtask_id] = ss def notify_update_task(self, task_id): self.notice_task_updated(task_id) @handle_task_key_error def notice_task_updated(self, task_id): # self.save_state() if self.task_persistence: self.dump_task(task_id) dispatcher.send(signal='golem.taskmanager', event='task_status_updated', task_id=task_id)
class LocalComputer: DEFAULT_WARNING = "Computation failed" DEFAULT_SUCCESS = "Task computation success!" def __init__(self, root_path: str, success_callback: Callable, error_callback: Callable, get_compute_task_def: Callable[[], ComputeTaskDef] = None, compute_task_def: ComputeTaskDef = None, check_mem: bool = False, comp_failed_warning: str = DEFAULT_WARNING, comp_success_message: str = DEFAULT_SUCCESS, resources: list = None, additional_resources=None) -> None: self.res_path = None self.tmp_dir: Optional[str] = None self.success = False self.lock = Lock() self.tt: Optional[DockerTaskThread] = None self.dir_manager = DirManager(root_path) self.compute_task_def = compute_task_def self.get_compute_task_def = get_compute_task_def self.error_callback = error_callback self.success_callback = success_callback self.check_mem = check_mem self.comp_failed_warning = comp_failed_warning self.comp_success_message = comp_success_message if resources is None: resources = [] self.resources = resources if additional_resources is None: additional_resources = [] self.additional_resources = additional_resources self.start_time = None self.end_time = None self.test_task_res_path: Optional[str] = None def run(self) -> None: try: self.start_time = time.time() self._prepare_tmp_dir() self._prepare_resources(self.resources) # makes a copy if not self.compute_task_def: ctd = self.get_compute_task_def() else: ctd = self.compute_task_def self.tt = self._get_task_thread(ctd) self.tt.start().addBoth(lambda _: self.task_computed(self.tt)) except Exception as exc: # pylint: disable=broad-except logger.warning("%s", self.comp_failed_warning, exc_info=True) self.error_callback(exc) def end_comp(self) -> bool: if self.tt: self.tt.end_comp() return True return False def get_progress(self): if self.tt: with self.lock: if self.tt.get_error(): logger.warning(self.comp_failed_warning) return 0.0 return self.tt.get_progress() return None def task_computed(self, task_thread: TaskThread) -> None: self.end_time = time.time() if self.is_success(task_thread): self.computation_success(task_thread) else: self.computation_failure(task_thread) # This cannot be changed to staticmethod, because it's overriden in # a derived class # pylint:disable=no-self-use def is_success(self, task_thread: TaskThread) -> bool: return \ not task_thread.error \ and task_thread.result \ and task_thread.result.get("data") def computation_success(self, task_thread: TaskThread) -> None: self.success_callback(task_thread.result, self._get_time_spent()) def computation_failure(self, task_thread: TaskThread) -> None: logger_msg = self.comp_failed_warning if task_thread.error_msg: logger_msg += " " + task_thread.error_msg logger.warning(logger_msg) self.error_callback(to_unicode(task_thread.error_msg)) def _get_time_spent(self): try: return self.end_time - self.start_time except TypeError: logger.error("Cannot measure execution time") def _prepare_resources(self, resources): self.test_task_res_path = self.dir_manager.get_task_test_dir("") def onerror(func, target_path, exc_info): # Try to set write permissions if not os.access(target_path, os.W_OK): os.chmod(target_path, stat.S_IWUSR) func(target_path) else: raise OSError('Cannot remove {}: {}'.format( target_path, exc_info)) if os.path.exists(self.test_task_res_path): shutil.rmtree(self.test_task_res_path, onerror=onerror) if resources: if len(resources) == 1 and os.path.isdir(resources[0]): shutil.copytree(resources[0], self.test_task_res_path) else: # no trailing separator if len(resources) == 1: base_dir = os.path.dirname(resources[0]) else: base_dir = common_dir(resources) base_dir = os.path.normpath(base_dir) for resource in filter(None, resources): norm_path = os.path.normpath(resource) sub_path = norm_path.replace(base_dir + os.path.sep, '', 1) sub_dir = os.path.dirname(sub_path) dst_dir = os.path.join(self.test_task_res_path, sub_dir) os.makedirs(dst_dir, exist_ok=True) name = os.path.basename(resource) shutil.copy2(resource, os.path.join(dst_dir, name)) for res in self.additional_resources: if not os.path.exists(self.test_task_res_path): os.makedirs(self.test_task_res_path) shutil.copy(res, self.test_task_res_path) return True def _prepare_tmp_dir(self): self.tmp_dir = self.dir_manager.get_task_temporary_dir("") if os.path.exists(self.tmp_dir): shutil.rmtree(self.tmp_dir, True) os.makedirs(self.tmp_dir) def _get_task_thread(self, ctd: ComputeTaskDef) -> DockerTaskThread: if self.test_task_res_path is None: raise RuntimeError('Resource path is set to None') if self.tmp_dir is None: raise RuntimeError('Temporary directory is set to None') dir_mapping = DockerTaskThread.generate_dir_mapping( resources=self.test_task_res_path, temporary=self.tmp_dir, ) return DockerTaskThread( ctd['subtask_id'], ctd['docker_images'], ctd['src_code'], ctd['extra_data'], dir_mapping, 0, check_mem=self.check_mem, )