def _make_rsync_task(id: int, source: Storage, rel_path: str, target: Storage, msg_out=outStub): task = Task(id, source, rel_path, target, msg_out=msg_out) task.copytool = default_rsync return task
def destroy_test_evironment(test_dir: str): # to unblock the readonly_dir we use Copytool.unblock_source_file() task = Task(1, "xxx", "yyy", "zzz") task.absolute_source_path = test_dir TestHelper.unblock_source_file(task) # now we can just remove all TestHelper.remove_dir(test_dir) # and recreate it TestHelper._mkdir(test_dir)
def copy(self, task: Task): """ :raises: NoActiveTaskException """ if task is None: raise (NoActiveTaskException(Worker.NO_TASK % self._name)) self.validate_task(task) task.set_starting_time(datetime.now()) # as blocking files does not work, insert another method for assuring consistency in source_file here stats = task.copytool.copy(task) self._msg.send_task_stats(task_id=task.get_id(), stats=stats) return task
def _retrieve_task_values(self, task: Task, taskvalues=dict()): """Reads all the neccessary data from task.""" taskvalues["worker_name"] = self._id taskvalues["id"] = task.get_id() taskvalues["status"] = task.status.name taskvalues["absolute_target_path"] = task.absolute_target_path if task.get_starting_time() is not None: taskvalues["start_time"] = task.get_starting_time().isoformat() else: taskvalues["start_time"] = None if task.get_completion_time() is not None: taskvalues["completion_time"] = task.get_completion_time( ).isoformat() else: taskvalues["completion_time"] = None return taskvalues
def assign_task(self, user_name, job_id, source_path, target_path, copy_options): """ Checks an incoming move command for validity. If the command is accepted, returns ok and iniciates the move process. ELse returns suiting error code. """ try: action = Action(copy_options['action']) executable = ToolConfigParser().get_executable_path( copy_options['copytool']) copytool_class = ToolConfigParser().get_copytool_class( copy_options['copytool']) copytool = copytool_class(copy_options['retrycount'], copy_options['options'], executable) new_task = Task(job_id, source_path, self._msg, target_path=target_path, copytool=copytool, action=action) self._task = self._worker.adopt_task(new_task) except StorageAliasNotFoundException as e: self._task.add_exception(e) self._task.status = TaskStatus.EXCEPTION self._msg.raise_exception(self._task) return { 'Exception': 'StorageAliasNotFoundException', } except StorageNotMountedException as e: self._task.add_exception(e) self._task.status = TaskStatus.EXCEPTION self._msg.raise_exception(self._task) return { 'Exception': 'StorageNotMountedException', } except WorkerStatusException as e: return { 'Exception': 'WorkerStatusException', } except NamingConventionError as e: return { 'Exception': 'NamingConventionError', } self._thread = RunThread(self._task, self._worker, self._msg) self._thread.start() return { 'Exception': 'None', }
def kill_process(self, task): """ Interrupts the currently ongoing copy-process and deletes the already copied parts from the target disk. If everithing went well TaskStatus is set to terminated. If any error/exception occured it is added to task and TaskStatus is set to error/exception. :param task: the currently executed task. :return task: the updated task. """ if not self.is_copying(): raise (OperationNotAllowedException(Copytool.TOOL_NOT_RUNNING % task.worker_name)) try: os.killpg(os.getpgid(self._process.pid), signal.SIGTERM) except Exception as e: task.add_error(e) task.status = TaskStatus.ERROR # to use delete method we create a dummy task dummy = Task(42, task.source_path, task.msg_out) dummy.status = TaskStatus.CHECKED if os.path.exists(task.absolute_target_path): dummy = self.delete(dummy) # now, error handling... if dummy.status == TaskStatus.ERROR: task.add_error(dummy.get_errors()[0]) task.status = TaskStatus.ERROR elif dummy.status == TaskStatus.EXCEPTION: task.add_exception(dummy.get_exceptions()[0]) task.status = TaskStatus.EXCEPTION else: # if there was an error during process kill we do not want to # overwrite it if task.status == TaskStatus.ERROR: return task task.status = TaskStatus.TERMINATED #self._process = None # this is done in execute_cmd method return task
def build_test_environment(test_dir: str): # just to avoid conflicts in mkdir and make sure testenvironment is created TestHelper.destroy_test_evironment(test_dir) # build directories TestHelper._mkdir(test_dir + "centos-directory-1") TestHelper._mkdir(test_dir + "centos-directory-1/subdir") TestHelper._mkdir(test_dir + "centos-test_dir-1") TestHelper._mkdir(test_dir + "centos-test_dir-1/subdir") TestHelper._mkdir(test_dir + "moved") TestHelper._mkdir(test_dir + "extract") # to construct the readonly_dir we use Copytool.block_source_file() TestHelper._mkdir(test_dir + "centos-readonly_dir-1") task = Task(1, "xxx", "yyy", "zzz") task.absolute_source_path = test_dir + "centos-readonly_dir-1" TestHelper.block_source_file(task) TestHelper._mkfile(test_dir + "centos-testfile-1.txt", "test successful\nother value") TestHelper._mkfile(test_dir + "centos-test_dir-1/testfile1.txt", "test successful\nother value") TestHelper._mkfile(test_dir + "centos-test_dir-1/testfile2.txt", "test successful") TestHelper._mkfile(test_dir + "centos-test_dir-1/subdir/testfile1.txt", "test successful\nother value") TestHelper._mkfile(test_dir + "centos-test_dir-1/subdir/testfile2.txt", "test successful") TestHelper._mkfile(test_dir + "centos-directory-1/cksum_testfile1.txt", "test successful\nother value") TestHelper._mkfile(test_dir + "centos-directory-1/cksum_testfile2.txt", "test successful") TestHelper._mkfile( test_dir + "centos-directory-1/subdir/testfile1.txt", "test successful\nother value") TestHelper._mkfile( test_dir + "centos-directory-1/subdir/testfile2.txt", "test successful")
def _execute_cksum(path: str, task: Task): """Executes cksum with given path and returns its output as int. :raises: IncorrectConfigInWorkerException, CopyNotSuccessfullException, SystemSetupError """ cmd = ["cksum", path] try: output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, universal_newlines=True) except subprocess.CalledProcessError as process_err: # cksum errors all seem to have return code 1... raise (CopyNotSuccessfullException( Cksum.CKSUM_ERR % (task.get_id(), process_err.output))) except ValueError as value_err: raise (IncorrectConfigInWorkerException( Cksum.WRONG_CONFIG % (task.worker_name, value_err.output))) except OSError as os_err: raise (SystemSetupError( ConsistencyCheckTool.WRONG_SETUP % (task.worker_name, Cksum.get_name(), os_err.output))) return output
(self.switch_redo(True), sleep(1)), lambda self: ()) ] ancor = {"name": "div", "class_": "item"} item_patterns = [("price", { "name": "strong" }), ("count", { "name": "div", "class_": "deal-cnt" }), ("shopname", { "name": "a", "class_": "shopname" }), ("shoplink", { "name": "a", "class_": "shopname" }, "href")] #parser_pattern = {"ancor":ancor, "item_patterns":item_patterns, "output_params":{"output_file":"data/wavebetter_links.json", "output_name":"wavebetter"}} parser_pattern = { "ancor": ancor, "item_patterns": item_patterns, "output_params": { "output_db": { "host": "127.0.0.1", "port": 27017 }, "output_name": "wavebetter" } } a = Task() a.add(url, actions, parser_pattern) a.save('data/task.json')
def consistency_check(task: Task): """Performs a consistency check on copied file using the tool cksum. :raises: IncorrectConfigInWorkerException, CopyNotSuccessfullException, SystemSetupError """ orig_path = task.absolute_source_path copy_path = task.absolute_target_path # Because cksum only can check single files, we have to recurse manually orig_sum = 0 if os.path.isdir(orig_path): # as we only compare the sums in the end, we don't depend on having the same order checking files for root, dirs, files in os.walk(orig_path): for fname in files: full_path = os.path.join(root, fname) try: orig_sum += int( Cksum._execute_cksum(full_path, task)[:-2].split(" ")[0]) except CopyNotSuccessfullException as e: task.add_exception(e) task.status = TaskStatus.EXCEPTION return task else: # we just have one file to check: try: orig_sum += int( Cksum._execute_cksum(orig_path, task)[:-2].split(" ")[0]) except CopyNotSuccessfullException as e: task.add_exception(e) task.sstatus = TaskStatus.EXCEPTION return task copy_sum = 0 if os.path.isdir(copy_path): # as we only compare the sums in the end, we don't depend on having the same order checking files for root, dirs, files in os.walk(copy_path): for fname in files: full_path = os.path.join(root, fname) try: copy_sum += int( Cksum._execute_cksum(full_path, task)[:-2].split(" ")[0]) except CopyNotSuccessfullException as e: task.add_exception(e) task.status = TaskStatus.EXCEPTION return task else: # we just have one file to check: try: copy_sum += int( Cksum._execute_cksum(copy_path, task)[:-2].split(" ")[0]) except CopyNotSuccessfullException as e: task.add_exception(e) task.status = TaskStatus.EXCEPTION return task # now we compare both sums and see whether they are equal. if orig_sum == copy_sum: task.status = TaskStatus.CHECKED else: task.add_exception( CopyNotSuccessfullException( Cksum.CKSUM_NOSUCCESS % (task.get_id(), orig_sum, copy_sum))) task.status = TaskStatus.EXCEPTION return task
def test_file_compress(centos127, archive1, outStub): with pytest.raises(NotADirectoryException): Task(1, centos127, 'johann-testfile-1.txt', archive1, outStub)
def consistency_check(task: Task, **kwargs): if task.status == TaskStatus.COPIED: task.status = TaskStatus.CHECKED return task
def handle_error(self, retcode, cmd, output, task: Task): """Evaluates the returncode of tar and throws suiting Errors. :raises: CopyFailError, ConnectionFailedError""" # WARNING this error handling assumes tar version 1.30 or older! if retcode == -1: # the process was killed intendedly, so we just update its status task.status = TaskStatus.TERMINATED return task elif retcode == 1: # some files differ -> consistency check was not successfull if self._retries < self._retry_count: self._retries += 1 self.execute_cmd(cmd, task) else: task.add_error(CopyFailError(Copytool.CHECK_FAIL % (task.worker_name, task.get_id(), str(cmd), output))) elif retcode == 2: # a fatal, unrecoverable error occured task.add_error(CopyFailError(Copytool.WRITE_FAIL % (task.worker_name, task.get_id(), str(cmd), output))) else: task.add_error(CopyFailError(Copytool.WRITE_FAIL % (task.worker_name, task.get_id(), str(cmd), output))) task.status = TaskStatus.ERROR return task
def delete(self, task: Task): task.status = TaskStatus.DELETED return task
def consistency_check(self, task: Task): task.status = TaskStatus.CHECKED return task
def _make_tar_task(id, source, rel, target, msg_out=outStub): task = Task(id, source, rel, target, msg_out=msg_out) task.copytool = default_tar return task
def _make_shiftc_task(id, source, rel, target): task = Task(id, source, rel, target, msg_out=outStub) task.copytool = default_shiftc return task
from worker.parser import BS4Parser from worker.spider import Spider from worker.task import Task from manager.threadmgr import ThreadManager as Manager #from manager.simplemgr import SimpleManager as Manager from time import sleep def myworker(url, actions, parser_pattern): myparser = BS4Parser(**parser_pattern) site = Spider(url, myparser) site.go(actions) myparser.output() sleep(3) if __name__=="__main__": t = Task() t.load('data/task.json') m = Manager(3, myworker, t.get()) m.run() m.join()