def business_get_log(id: TaskId, tail: bool) -> Tuple[Content, HttpStatusCode]: """Fetches log file created by spawned task (output redirection). It relies on reading files located on filesystem, via connection with `[email protected]` If file does not exist there's no way to fetch it from database (currently). File names must be named in one fashion (standard defined in `task_nursery.fetch_log`, currently: `task_<id>.log`). Renaming them manually will lead to inconsistency or 'Not Found' errors. `tail` argument allows for returning only the last few lines (10 is default for `tail` program). For more details, see,: `task_nursery.fetch_log`. """ try: task = Task.get(id) parent_job = Job.get(task.job_id) assert task.hostname, 'hostname is empty' assert parent_job.user, 'user does not exist' output_gen, log_path = task_nursery.fetch_log(task.hostname, parent_job.user.username, task.id, tail) except NoResultFound: content, status = {'msg': TASK['not_found']}, 404 except ExitCodeError as e: content, status = {'msg': TASK['get_log']['failure']['not_found'].format(location=e)}, 404 except AssertionError as e: content, status = {'msg': TASK['get_log']['failure']['assertions'].format(reason=e)}, 422 except (ConnectionErrorException, AuthenticationException, UnknownHostException) as e: content, status = {'msg': SSH['failure']['connection'].format(reason=e)}, 500 except Exception as e: log.critical(e) content, status = {'msg': GENERAL['internal_error']}, 500 else: content, status = {'msg': TASK['get_log']['success'], 'path': log_path, 'output_lines': list(output_gen)}, 200 finally: return content, status
def delete(id: JobId) -> Tuple[Content, HttpStatusCode]: """Deletes a Job db record. If running, requires stopping job manually in advance.""" try: job = Job.get(id) if not (is_admin() or job.user_id == get_jwt_identity()): raise ForbiddenException('not an owner') assert job.status is not JobStatus.running, 'must be stopped first' job.destroy() except ForbiddenException as fe: content, status = {'msg': JOB['update']['failure']['forbidden'].format(reason=fe)}, 403 except AssertionError as e: content, status = {'msg': JOB['delete']['failure']['assertions'].format(reason=e)}, \ HTTPStatus.UNPROCESSABLE_ENTITY.value except NoResultFound: content, status = {'msg': JOB['not_found']}, HTTPStatus.NOT_FOUND.value except Exception as e: log.critical(e) content, status = {'msg': GENERAL['internal_error']}, HTTPStatus.INTERNAL_SERVER_ERROR.value else: content, status = {'msg': JOB['delete']['success']}, HTTPStatus.OK.value finally: return content, status
def test_create_job(tables, client, new_user): new_user.save() job_name = 'TestJob' data = { 'name': job_name, 'description': 'testDescription', 'userId': 1, 'startAt': DateUtils.stringify_datetime_to_api_format(datetime.datetime.utcnow() + timedelta(hours=5)), 'stopAt': DateUtils.stringify_datetime_to_api_format(datetime.datetime.utcnow() + timedelta(hours=10)) } resp = client.post(ENDPOINT, headers=HEADERS, data=json.dumps(data)) resp_json = json.loads(resp.data.decode('utf-8')) assert resp.status_code == HTTPStatus.CREATED assert resp_json['job']['id'] is not None assert resp_json['job']['name'] == job_name assert Job.get(int(resp_json['job']['id'])) is not None
def business_spawn(id: TaskId) -> Tuple[Content, HttpStatusCode]: """Spawns command stored in Task db record (task.full_command). It won't allow for spawning task which is currently running (sync + status check). If spawn operation has succeeded then `running` status is set. """ try: task = Task.get(id) parent_job = Job.get(task.job_id) assert task.status is not TaskStatus.running, 'task is already running' assert task.full_command, 'command is empty' assert task.hostname, 'hostname is empty' assert parent_job.user, 'user does not exist' pid = task_nursery.spawn(task.full_command, task.hostname, parent_job.user.username, name_appendix=str(task.id)) task.pid = pid task.status = TaskStatus.running task.save() except NoResultFound: content, status = {'msg': TASK['not_found']}, 404 except AssertionError as e: content, status = {'msg': TASK['spawn']['failure']['assertions'].format(reason=e)}, 422 except SpawnError as e: log.warning(e) content, status = {'msg': TASK['spawn']['failure']['backend'].format(reason=e)}, 500 except Exception as e: log.critical(e) content, status = {'msg': GENERAL['internal_error']}, 500 else: log.info('Task {} is now: {}'.format(task.id, task.status.name)) content, status = {'msg': TASK['spawn']['success'], 'pid': pid}, 200 finally: return content, status
def update(id: JobId, newValues: Dict[str, Any]) -> Tuple[Content, HttpStatusCode]: """Updates certain fields of a Job db record, see `allowed_fields`.""" new_values = newValues allowed_fields = {'name', 'description', 'startAt', 'stopAt'} try: job = Job.get(id) if not (is_admin() or job.user_id == get_jwt_identity()): raise ForbiddenException('not an owner') assert set(new_values.keys()).issubset(allowed_fields), 'invalid field is present' assert job.status is not JobStatus.running, 'must be stopped first' for field_name, new_value in new_values.items(): field_name = snakecase(field_name) if new_value is not None: assert hasattr(job, field_name), 'job has no {} field'.format(field_name) setattr(job, field_name, new_value) job.save() except ForbiddenException as fe: content, status = {'msg': JOB['update']['failure']['forbidden'].format(reason=fe)}, 403 except NoResultFound: content, status = {'msg': JOB['not_found']}, HTTPStatus.NOT_FOUND.value except AssertionError as e: content, status = {'msg': JOB['update']['failure']['assertions'].format(reason=e)}, \ HTTPStatus.UNPROCESSABLE_ENTITY.value except Exception as e: log.critical(e) content, status = {'msg': GENERAL['internal_error']}, HTTPStatus.INTERNAL_SERVER_ERROR.value else: content, status = {'msg': JOB['update']['success'], 'job': job.as_dict()}, HTTPStatus.OK.value finally: return content, status
def remove_task(job_id: JobId, task_id: TaskId) -> Tuple[Content, HttpStatusCode]: """Removes Task from Job.""" job = None try: job = Job.get(job_id) task = Task.get(task_id) assert job.user_id == get_jwt_identity(), 'Not an owner' job.remove_task(task) except NoResultFound: if job is None: content, status = {'msg': JOB['not_found']}, HTTPStatus.NOT_FOUND.value else: content, status = {'msg': TASK['not_found']}, HTTPStatus.NOT_FOUND.value except InvalidRequestException as e: content, status = {'msg': JOB['tasks']['remove']['failure']['not_found'].format(reason=e)}, \ HTTPStatus.NOT_FOUND.value except AssertionError as e: content, status = {'msg': JOB['tasks']['remove']['failure']['assertions'].format(reason=e)}, \ HTTPStatus.FORBIDDEN.value except Exception as e: log.critical(e) content, status = {'msg': GENERAL['internal_error']}, HTTPStatus.INTERNAL_SERVER_ERROR.value else: content, status = {'msg': JOB['tasks']['remove']['success'], 'job': job.as_dict()}, HTTPStatus.OK.value finally: return content, status
def test_create_job_without_dates(tables, client, new_user): new_user.save() job_name = 'TestJob' data = {'name': job_name, 'description': 'testDescription', 'userId': 1} resp = client.post(ENDPOINT, headers=HEADERS, data=json.dumps(data)) resp_json = json.loads(resp.data.decode('utf-8')) assert resp.status_code == HTTPStatus.CREATED assert resp_json['job']['id'] is not None assert resp_json['job']['name'] == job_name assert Job.get(int(resp_json['job']['id'])) is not None
def execute(id: JobId) -> Tuple[Content, HttpStatusCode]: try: job = Job.get(id) assert job.user_id == get_jwt_identity(), 'Not an owner' except NoResultFound: content, status = {'msg': JOB['not_found']}, HTTPStatus.NOT_FOUND.value except AssertionError as e: content, status = {'msg': GENERAL['unprivileged'].format(reason=e)}, HTTPStatus.FORBIDDEN.value else: content, status = business_execute(id) finally: return content, status
def synchronize(task_id: TaskId) -> None: """Updates the state of a Task object stored in database. It compares current db record with list of active screen session (their pids in general) on node defined by that record ([email protected]). If task_nursery is unable to fetch active screen sessions then the new state is always set to unsynchronized. If task.pid is not alive (db record is outdated), then it makes transition from last known state to a new state: state before sync => state applied after sync ----------------------------------------------- running => terminated unsynchronized => not_running On every state transition job status is synchronized too """ log.debug('Syncing Task {}...'.format(task_id)) try: task = Task.get(task_id) parent_job = Job.get(task.job_id) assert task.hostname, 'hostname is empty' assert parent_job.user, 'user does not exist' active_sessions_pids = task_nursery.running(host=task.hostname, user=parent_job.user.username) except NoResultFound: # This exception must be handled within try/except block when using Task.get() # In other words, methods decorated with @synchronize_task_record must handle this case by themselves! log.warning( 'Task {} could not be found (also synchronized). Failing without taking any action...'.format(task_id)) pass except (AssertionError, Exception) as e: # task_nursery.running pssh exceptions are also catched here log.error('Unable to synchronize Task {}, reason: {}'.format(task_id, e)) log.debug('Task {} status was: {}'.format(task_id, task.status.name)) task.status = TaskStatus.unsynchronized task.save() log.debug('Task {} is now: {}'.format(task_id, task.status.name)) else: log.debug('[BEFORE SYNC] Task {} status was: {}'.format(task_id, task.status.name)) change_status_msg = '[AFTER SYNC] Task {id} is now: {curr_status}' if task.pid not in active_sessions_pids: if task.status is TaskStatus.running: task.status = TaskStatus.terminated log.debug(change_status_msg.format(id=task_id, curr_status=task.status.name)) if task.status is TaskStatus.unsynchronized: task.status = TaskStatus.not_running log.debug(change_status_msg.format(id=task_id, curr_status=task.status.name)) task.pid = None task.save()
def update(id: TaskId, newValues: Dict[str, Any]) -> Tuple[Content, HttpStatusCode]: try: task = Task.get(id) parent_job = Job.get(task.job_id) if not is_admin() and not parent_job.user_id == get_jwt_identity(): raise ForbiddenException('not an owner') except NoResultFound: content, status = {'msg': TASK['not_found']}, 404 except ForbiddenException: content, status = {'msg': GENERAL['unprivileged']}, 403 else: content, status = business_update(id, newValues) finally: return content, status
def get_log(id: TaskId, tail: bool) -> Tuple[Content, HttpStatusCode]: try: task = Task.get(id) parent_job = Job.get(task.job_id) if not is_admin() and not parent_job.user_id == get_jwt_identity(): raise ForbiddenException("not an owner") except NoResultFound: content, status = {'msg': TASK['not_found']}, 404 except ForbiddenException: content, status = {'msg': GENERAL['unprivileged']}, 403 else: content, status = business_get_log(id, tail) finally: return content, status
def dequeue(id: JobId) -> Tuple[Content, HttpStatusCode]: try: job = Job.get(id) if not (is_admin() or job.user_id == get_jwt_identity()): raise ForbiddenException("not an owner") job.dequeue() except NoResultFound: content, status = {'msg': JOB['not_found']}, HTTPStatus.NOT_FOUND.value except ForbiddenException as fe: content, status = {'msg': GENERAL['unprivileged'].format(reason=fe)}, HTTPStatus.FORBIDDEN.value except AssertionError as ae: content, status = {'msg': JOB['dequeue']['failure'].format(reason=ae)}, HTTPStatus.CONFLICT.value else: content, status = {'msg': JOB['dequeue']['success'], 'job': job.as_dict()}, HTTPStatus.OK.value finally: return content, status
def stop(id: JobId, gracefully: Optional[bool] = True) -> Tuple[Content, HttpStatusCode]: try: job = Job.get(id) assert get_jwt_identity() == job.user_id or is_admin() assert job.status is JobStatus.running, 'Only running jobs can be stopped' except NoResultFound: content, status = {'msg': JOB['not_found']}, HTTPStatus.NOT_FOUND.value except AssertionError as e: if 'Only running jobs can be stopped' in e.args[0]: content, status = {'msg': JOB['stop']['failure']['state'].format(reason=e)}, \ HTTPStatus.CONFLICT.value else: content, status = {'msg': GENERAL['unprivileged']}, HTTPStatus.FORBIDDEN.value else: content, status = business_stop(id, gracefully) finally: return content, status
def get_by_id(id: JobId) -> Tuple[Content, HttpStatusCode]: """Fetches one Job db record""" try: job = Job.get(id) assert get_jwt_identity() == job.user_id or is_admin() except NoResultFound as e: log.warning(e) content, status = {'msg': JOB['not_found']}, HTTPStatus.NOT_FOUND.value except AssertionError: content, status = {'msg': GENERAL['unprivileged']}, HTTPStatus.FORBIDDEN.value except Exception as e: log.critical(e) content, status = {'msg': GENERAL['internal_error']}, HTTPStatus.INTERNAL_SERVER_ERROR.value else: content, status = {'msg': JOB['get']['success'], 'job': job.as_dict()}, HTTPStatus.OK.value finally: return content, status
def business_terminate(id: TaskId, gracefully: Optional[bool] = True) -> Tuple[Content, HttpStatusCode]: """Sends SIGINT (default) or SIGKILL to process with pid that is stored in Task db record. In order to send SIGKILL, pass `gracefully=False` to `terminate` function. Note that: 1) `exit_code` is related to executing kill operation, not killed process. 2) termination signal should be respected by most processes, however this function does not guarantee stoping the process! """ try: task = Task.get(id) assert task.status is TaskStatus.running, 'only running tasks can be terminated' assert task.pid, 'task has no pid assigned' # It means there's inconsistency parent_job = Job.get(task.job_id) # gracefully: # True -> interrupt (allows output to be flushed into log file) # None -> terminate (works almost every time, but losing output that could be produced before closing) # False -> kill (similar to above, but success is almost guaranteed) exit_code = task_nursery.terminate(task.pid, task.hostname, parent_job.user.username, gracefully=gracefully) if exit_code != 0: raise ExitCodeError('operation exit code is not 0') # Note: Code below is unsafe, because interrupt and terminate does not guarantee success. # It's better to let synchhronization update this (via comparison with screen sessions) # (Unsafe section) Original comment: Allow to spawn that task again # task.pid = None # task.status = TaskStatus.terminated task.save() except NoResultFound: content, status = {'msg': TASK['not_found']}, 404 except ExitCodeError: content, status = {'msg': TASK['terminate']['failure']['exit_code'], 'exit_code': exit_code}, 202 except AssertionError as e: content, status = {'msg': TASK['terminate']['failure']['state'].format(reason=e)}, 409 except ConnectionErrorException as e: content, status = {'msg': TASK['failure']['connection'].format(reason=e)}, 500 except Exception as e: log.critical(e) content, status = {'msg': GENERAL['internal_error']}, 500 else: content, status = {'msg': TASK['terminate']['success'], 'exit_code': exit_code}, 200 finally: return content, status
def business_stop(id: JobId, gracefully: Optional[bool] = True) -> Tuple[Content, HttpStatusCode]: """Tries to terminate all Tasks belonging to Job. If some tasks did not terminate correctly, job is not terminated, but all of the other (correct) tasks are terminated. 'gracefully' parameter is passed to all of Tasks 'terminate' methods to either send SIGINT (default) or SIGKILL to processes with pid that are stored in Tasks db records. In order to send SIGKILL, pass `gracefully=False` to `stop` function. Note that termination signal should be respected by most processes, however this function does not guarantee stoping them! """ try: job = Job.get(id) not_terminated_tasks = 0 for task in job.tasks: content, status = business_terminate(task.id, gracefully) if status != HTTPStatus.OK.value: not_terminated_tasks += 1 assert not_terminated_tasks == 0, 'Not all tasks could be terminated' # If job was scheduled to start automatically # but user decided to stop it manually # scheduler should not execute the job by itself then if job.start_at: # So this job should not be started automatically anymore job.start_at = None job.synchronize_status() job.save() except NoResultFound: content, status = {'msg': JOB['not_found']}, HTTPStatus.NOT_FOUND.value except AssertionError as e: content, status = {'msg': JOB['stop']['failure']['tasks'].format(reason=e)}, \ HTTPStatus.UNPROCESSABLE_ENTITY.value except Exception as e: log.critical(e) content, status = {'msg': GENERAL['internal_error']}, HTTPStatus.INTERNAL_SERVER_ERROR.value else: log.info('Job {} is now: {}'.format(job.id, job.status.name)) content, status = {'msg': JOB['stop']['success'], 'job': job.as_dict()}, HTTPStatus.OK.value finally: return content, status
def business_execute(id: JobId) -> Tuple[Content, HttpStatusCode]: """Tries to spawn all commands stored in Tasks belonging to Job (db records - (task.command)) It won't allow for executing job which is currently running. If execute operation has succeeded then `running` status is set If one or more tasks did not spawn correctly, job is marked as running anyway and tasks which spawned correctly are running too. In that case user can stop the job and try to run it again, or just continue. """ try: not_spawned_tasks = [] job = Job.get(id) assert job.status is not JobStatus.running, 'Job is already running' for task in job.tasks: content, status = business_spawn(task.id) if status is not HTTPStatus.OK.value: not_spawned_tasks.append(task.id) job.synchronize_status() job.save() assert not_spawned_tasks == [], 'Could not spawn some tasks' # If job was scheduled to stop and user just # execute that job manually, scheduler should still # continue to watch and stop the job automatically. except NoResultFound: content, status = {'msg': JOB['not_found']}, HTTPStatus.NOT_FOUND.value except AssertionError as e: if 'Job is already running' in e.args[0]: content, status = {'msg': JOB['execute']['failure']['state'].format(reason=e)}, \ HTTPStatus.CONFLICT.value else: content, status = {'msg': JOB['execute']['failure']['tasks'].format(reason=e), 'not_spawned_list': not_spawned_tasks}, HTTPStatus.UNPROCESSABLE_ENTITY.value except Exception as e: log.critical(e) content, status = {'msg': GENERAL['internal_error']}, HTTPStatus.INTERNAL_SERVER_ERROR.value else: log.info('Job {} is now: {}'.format(job.id, job.status.name)) content, status = {'msg': JOB['execute']['success'], 'job': job.as_dict()}, HTTPStatus.OK.value finally: return content, status