async def add_file(self, name: str, path: Optional[str], stream: Any) -> str: """ Add the byte stream as the file with name 'name' into self. :param name: File name. :param path: The client-side full path of the file. For replicating a directory structure. :param stream: The byte stream with file content. """ base_path: Path if self.tag is not None: base_path = Path(tempfile.gettempdir(), self.USER_DIR_PATTERN % self.user_id) TempDirForTasks.ensure_exists(base_path, self.user_dir_cache) base_path /= self.tag else: base_path = Path(tempfile.mkdtemp(suffix=self._user_suffix())) if path is not None: assert path.endswith(name) base_path /= path[:-len(name)] TempDirForTasks.ensure_exists(base_path, self.user_dir_cache) dest_path = base_path.absolute().joinpath(name) # TODO: Should be an option if dest_path.exists(): return str(dest_path) with open(dest_path, "wb") as fout: buff = await stream.read(1024) while len(buff) != 0: fout.write(buff) buff = await stream.read(1024) return str(dest_path)
def __init__(self): super().__init__() super(LogEmitter, self).__init__() self.job_id: JobIDT = 0 self.temp_for_jobs = TempDirForTasks(self.link_src) # The state for multi-step jobs self.saved_state: Dict[str, Any] = {} # The last reply for interactive jobs self.last_reply: Dict[str, Any] = {}
def delete(self, current_user_id: UserIDT, job_id: JobIDT): """ Erase the job. """ # Security check with self._query_for_update(current_user_id, job_id) as job_bo: temp_for_job = TempDirForTasks(self.link_src) if job_bo.state in (DBJobStateEnum.Finished, DBJobStateEnum.Error, DBJobStateEnum.Pending): # TODO: Set the job to a state e.g. Trashed and erase in background, better for responsiveness temp_for_job.erase_for(job_id) job_bo.delete() elif job_bo.state == DBJobStateEnum.Running: # Set the job to Killed # TODO: No _real_ kill just presentation job_bo.state = DBJobStateEnum.Error job_bo.progress_msg = JobBO.KILLED_MESSAGE
def create(self) -> int: tsk = Task() self.session.add(tsk) self.session.commit() # Wipe any directory, which belongs to another task with same ID temp_for_task = TempDirForTasks(join(self.link_src, 'temptask')).base_dir_for(tsk.id) shutil.rmtree(temp_for_task) return tsk.id
def get_file_stream(self, current_user_id: UserIDT, task_id: int) -> Tuple[IO, str]: """ Return a stream containing the file associated with this task. """ # Sanity & security checks task: Task = self.session.query(Task).get(task_id) assert task is not None, NOT_FOUND current_user = self.session.query(User).get(current_user_id) assert (task.owner_id == current_user_id) or (current_user.has_role(Role.APP_ADMINISTRATOR)), NOT_AUTHORIZED # TODO: 'temptask' constant is repeated in many places temp_for_task = TempDirForTasks(join(self.link_src, 'temptask')) temp_dir = temp_for_task.base_dir_for(task_id) assert task.inputparam is not None params = json.loads(task.inputparam) out_file_name = params["OutFile"] out_file_path = temp_dir / out_file_name try: return open(out_file_path, mode="rb"), out_file_name except IOError: return StringIO("NOT FOUND"), out_file_name
def __init__(self, task_id: Optional[int] = None, task_type: Optional[str] = None): super().__init__() self.task_id: int if task_id is None: # Create a new task task = Task() task.taskclass = task_type self.session.add(task) self.session.flush() self.task_id = task.id else: # Fetch existing task task = self.session.query(Task).get(task_id) # SimpleImport calls with task_id = 0 during verification # assert task is not None if task is None: assert task_id is not None self.task_id = task_id else: self.task_id = task.id self.task = task self.temp_for_task = TempDirForTasks(join(self.link_src, 'temptask'))
def get_file_stream(self, current_user_id: UserIDT, job_id: JobIDT) -> Tuple[IO, str, str]: """ Return a stream containing the produced file associated with this job. """ # Sanity & security checks with self._query_for_update(current_user_id, job_id) as job_bo: temp_for_job = TempDirForTasks(self.link_src) temp_dir = temp_for_job.base_dir_for(job_id) # Get the job in its state... with JobScheduler.instantiate(job_bo) as sce: out_file_name = sce.PRODUCED_FILE_NAME # ...and the file in its temp directory out_file_path = temp_dir / out_file_name if out_file_name.endswith(".zip"): media_type = "application/zip" elif out_file_name.endswith(".tsv"): media_type = "text/tab-separated-values" else: media_type = "unknown" try: return open(out_file_path, mode="rb"), out_file_name, media_type except IOError: # pragma:nocover return StringIO("NOT FOUND"), out_file_name, media_type
class TaskServiceBase(Service, ABC): """ Common methods and data for asynchronous and long operations. """ def __init__(self, task_id: Optional[int] = None, task_type: Optional[str] = None): super().__init__() self.task_id: int if task_id is None: # Create a new task task = Task() task.taskclass = task_type self.session.add(task) self.session.flush() self.task_id = task.id else: # Fetch existing task task = self.session.query(Task).get(task_id) # SimpleImport calls with task_id = 0 during verification # assert task is not None if task is None: assert task_id is not None self.task_id = task_id else: self.task_id = task.id self.task = task self.temp_for_task = TempDirForTasks(join(self.link_src, 'temptask')) def log_file_path(self): """ Ask for redirected logging. """ log_file = self.temp_for_task.base_dir_for(self.task_id) / 'TaskLogBack.txt' return log_file.as_posix() def update_task(self, taskstate: Optional[str], percent: Optional[int], message: str): """ Update various fields in current task. """ if taskstate is not None: self.task.taskstate = taskstate if percent is not None: self.task.progresspct = percent self.task.progressmsg = message self.task.lastupdate = datetime.datetime.now() self.session.commit() def update_progress(self, percent: int, message: str): self.update_task(taskstate=None, percent=percent, message=message) def report_progress(self, current, total): self.update_progress(20 + 80 * current / total, "Processing files %d/%d" % (current, total)) def set_task_params(self, owner_id: UserIDT, file_name: str): """ Set export task features for this export. """ task = self.session.query(Task).get(self.task_id) assert task is not None params = {"OutFile": file_name} task.inputparam = json.dumps(params) task.owner_id = owner_id self.session.commit()
class JobServiceBase(Service, LogEmitter, ABC): """ Common methods and data for asynchronous and long operations. This base class is for the short-lived instances which 'just' do some operations. For long-lived objects, i.e. processes @see JobScheduler class. """ JOB_TYPE: str JOB_LOG_FILE_NAME = 'TaskLogBack.txt' def __init__(self): super().__init__() super(LogEmitter, self).__init__() self.job_id: JobIDT = 0 self.temp_for_jobs = TempDirForTasks(self.link_src) # The state for multi-step jobs self.saved_state: Dict[str, Any] = {} # The last reply for interactive jobs self.last_reply: Dict[str, Any] = {} @staticmethod def find_jobservice_class_by_type(clazz, job_type: str): """ Find a subclass with given type """ for job_sub_class in clazz.__subclasses__(): if job_sub_class.JOB_TYPE == job_type: return job_sub_class else: for_subclass = JobServiceBase.find_jobservice_class_by_type( job_sub_class, job_type) if for_subclass: return for_subclass def log_file_path(self): """ Return redirected logging output path. @see DynamicLogs and LogEmitter. """ log_file = self.temp_for_jobs.base_dir_for( self.job_id) / self.JOB_LOG_FILE_NAME return log_file.as_posix() @abc.abstractmethod def do_background(self): """ Launch background processing""" pass def run_in_background(self): """ Background part of the job, standard behavior is to run the method and care for general problems. """ try: self.do_background() except Exception as e: with JobBO.get_for_update(self.session, self.job_id) as job_bo: job_bo.state = DBJobStateEnum.Error job_bo.progress_msg = str(e) job_bo.set_messages(format_exception(e)) with LogsSwitcher(self): logger.error("Unexpected termination of #%d", job_bo.id) logger.exception(e) @abc.abstractmethod def init_args(self, args: Dict) -> Dict: """ Serialization of __init__ arguments """ ... @staticmethod def deser_args(json_args: Dict) -> None: pass def _save_vars_to_state(self, names: List[str], *values): """ Save variables using provided names """ to_save = {a_name: a_value for a_name, a_value in zip(names, values)} with JobBO.get_for_update(self.session, self.job_id) as job_bo: job_bo.update_inside(to_save) self.saved_state = job_bo.inside def _load_vars_from_state(self, names: List[str]) -> List[Any]: """ Load variables using provided names """ ret = [self.saved_state[a_name] for a_name in names] return ret def load_state_from(self, job_state: Dict[str, Any]) -> None: """ Injection of service state """ self.saved_state = job_state def load_reply_from(self, job_reply: Dict[str, Any]) -> None: """ Injection of service reply to last question """ self.last_reply = job_reply def create_job(self, job_type: str, user_id: UserIDT): args = self.init_args({}) new_job = JobBO.create_job(self.session, user_id, job_type, args) self.job_id = new_job.id self.temp_for_jobs.erase_for(new_job.id) def _get_job(self) -> Job: job: Optional[Job] = self.session.query(Job).get(self.job_id) assert job is not None return job def _get_owner_id(self) -> UserIDT: job: Optional[Job] = self.session.query(Job).get(self.job_id) assert job is not None return job.owner_id def get_job_for_update(self) -> Job: job = self._get_job() job.updated_on = datetime.now() return job def update_progress(self, percent: int, message: str): with JobBO.get_for_update(self.session, self.job_id) as job_bo: job_bo.progress_pct = percent job_bo.progress_msg = message def report_progress(self, current, total): self.update_progress(20 + 80 * current / total, "Processing files %d/%d" % (current, total)) def set_job_result(self, errors: List[str], infos: Dict[str, Any]): """ Set job detailed result and final status. """ with JobBO.get_for_update(self.session, self.job_id) as job_bo: job_bo.set_result(infos) # Limit storage to 1000 first errors job_bo.set_messages(errors[:1000]) if len(errors) > 0: job_bo.state = DBJobStateEnum.Error job_bo.progress_msg = "%d error(s) during run" % len(errors) else: job_bo.state = DBJobStateEnum.Finished job_bo.progress_pct = 100 job_bo.progress_msg = "Done" def get_job_result(self) -> Any: """ Get job detailed result. """ job_bo = JobBO.get_one(self.session, self.job_id) assert job_bo is not None return job_bo.get_result() def set_job_to_ask(self, message: str, question_data: Dict[str, Any]): """ Set the job to ask something from user. """ logger.info("Asking for: %s", question_data) with JobBO.get_for_update(self.session, self.job_id) as job_bo: job_bo.state = DBJobStateEnum.Asking job_bo.progress_msg = message job_bo.set_question(question_data)
def get_log_path(self, current_user_id: UserIDT, job_id: JobIDT) -> Path: # Sanity & security checks job: JobBO = self.query(current_user_id, job_id) temp_for_job = TempDirForTasks(self.link_src) log_file_path = temp_for_job.base_dir_for(job.id) / JobServiceBase.JOB_LOG_FILE_NAME return log_file_path
def get_temp(self, task_id: int, inside: str) -> str: temp_for_task = TempDirForTasks(join(self.link_src, 'temptask')).in_base_dir_for(task_id, inside) return temp_for_task