def _calculate_complexity(workflow): """Place workflow in queue and calculate and set its complexity.""" complexity = estimate_complexity(workflow.type_, workflow.reana_specification) workflow.complexity = complexity Session.commit() return complexity
def on_message(self, body, message): """On new message event handler.""" message.ack() body_dict = json.loads(body) workflow_uuid = body_dict.get("workflow_uuid") if workflow_uuid: workflow = ( Session.query(Workflow).filter_by(id_=workflow_uuid).one_or_none() ) next_status = body_dict.get("status") if next_status: next_status = RunStatus(next_status) print( " [x] Received workflow_uuid: {0} status: {1}".format( workflow_uuid, next_status ) ) logs = body_dict.get("logs") or "" if workflow.can_transition_to(next_status): _update_workflow_status(workflow, next_status, logs) if "message" in body_dict and body_dict.get("message"): msg = body_dict["message"] if "progress" in msg: _update_run_progress(workflow_uuid, msg) _update_job_progress(workflow_uuid, msg) # Caching: calculate input hash and store in JobCache if "caching_info" in msg: _update_job_cache(msg) Session.commit() else: logging.error( f"Cannot transition workflow {workflow.id_}" f" from status {workflow.status} to" f" {next_status}." )
def build_workflow_logs(workflow, steps=None): """Return the logs for all jobs of a workflow.""" if steps: jobs = ( Session.query(Job) .filter(and_(Job.workflow_uuid == workflow.id_, Job.job_name.in_(steps))) .order_by(Job.created) .all() ) else: jobs = ( Session.query(Job) .filter_by(workflow_uuid=workflow.id_) .order_by(Job.created) .all() ) all_logs = {} for job in jobs: item = { "workflow_uuid": str(job.workflow_uuid) or "", "job_name": job.job_name or "", "compute_backend": job.compute_backend or "", "backend_job_id": job.backend_job_id or "", "docker_img": job.docker_img or "", "cmd": job.prettified_cmd or "", "status": job.status.name or "", "logs": job.logs or "", } all_logs[str(job.id_)] = item return all_logs
def shutdown_reana_db_session(response_or_exc): """Close session on app teardown.""" from reana_db.database import Session as reana_db_session from invenio_db import db as invenio_db reana_db_session.remove() invenio_db.session.remove() return response_or_exc
def _update_job_cache(msg): """Update caching information for finished job.""" cached_job = Session.query(JobCache).filter_by( job_id=msg['caching_info'].get('job_id')).first() input_files = [] if cached_job: file_access_times = calculate_file_access_time( msg['caching_info'].get('workflow_workspace')) for filename in cached_job.access_times: if filename in file_access_times: input_files.append(filename) else: return cmd = msg['caching_info']['job_spec']['cmd'] # removes cd to workspace, to be refactored clean_cmd = ';'.join(cmd.split(';')[1:]) msg['caching_info']['job_spec']['cmd'] = clean_cmd if 'workflow_workspace' in msg['caching_info']['job_spec']: del msg['caching_info']['job_spec']['workflow_workspace'] input_hash = calculate_job_input_hash(msg['caching_info']['job_spec'], msg['caching_info']['workflow_json']) workspace_hash = calculate_hash_of_dir( msg['caching_info'].get('workflow_workspace'), input_files) if workspace_hash == -1: return cached_job.parameters = input_hash cached_job.result_path = msg['caching_info'].get('result_path') cached_job.workspace_hash = workspace_hash Session.add(cached_job)
def _update_run_progress(workflow_uuid, msg): """Register succeeded Jobs to DB.""" workflow = Session.query(Workflow).filter_by(id_=workflow_uuid).\ one_or_none() cached_jobs = None job_progress = workflow.job_progress if "cached" in msg['progress']: cached_jobs = msg['progress']['cached'] for status in PROGRESS_STATUSES: if status in msg['progress']: previous_status = workflow.job_progress.get(status) previous_total = 0 if previous_status: previous_total = previous_status.get('total') or 0 if status == 'total': if previous_total > 0: continue else: job_progress['total'] = \ msg['progress']['total'] else: if previous_status: new_job_ids = set(previous_status.get('job_ids') or set()) | \ set(msg['progress'][status]['job_ids']) else: new_job_ids = set(msg['progress'][status]['job_ids']) job_progress[status] = { 'total': len(new_job_ids), 'job_ids': list(new_job_ids) } workflow.job_progress = job_progress flag_modified(workflow, 'job_progress') Session.add(workflow)
def users_create_default(email, password, id_): """Create default user. This user has the administrator role and can retrieve other user information as well as create new users. """ reana_user_characteristics = { "id_": id_, "email": email, } try: user = User.query.filter_by(**reana_user_characteristics).first() if not user: reana_user_characteristics["access_token"] = secrets.token_urlsafe( 16) user = User(**reana_user_characteristics) create_user_workspace(user.get_user_workspace()) Session.add(user) Session.commit() # create invenio user, passing `confirmed_at` to mark it as confirmed register_user(email=email, password=password, confirmed_at=datetime.datetime.now()) click.echo(reana_user_characteristics["access_token"]) except Exception as e: click.echo("Something went wrong: {0}".format(e)) sys.exit(1)
def _update_job_cache(msg): """Update caching information for finished job.""" cached_job = (Session.query(JobCache).filter_by( job_id=msg["caching_info"].get("job_id")).first()) input_files = [] if cached_job: file_access_times = calculate_file_access_time( msg["caching_info"].get("workflow_workspace")) for filename in cached_job.access_times: if filename in file_access_times: input_files.append(filename) else: return cmd = msg["caching_info"]["job_spec"]["cmd"] # removes cd to workspace, to be refactored clean_cmd = ";".join(cmd.split(";")[1:]) msg["caching_info"]["job_spec"]["cmd"] = clean_cmd if "workflow_workspace" in msg["caching_info"]["job_spec"]: del msg["caching_info"]["job_spec"]["workflow_workspace"] input_hash = calculate_job_input_hash(msg["caching_info"]["job_spec"], msg["caching_info"]["workflow_json"]) workspace_hash = calculate_hash_of_dir( msg["caching_info"].get("workflow_workspace"), input_files) if workspace_hash == -1: return cached_job.parameters = input_hash cached_job.result_path = msg["caching_info"].get("result_path") cached_job.workspace_hash = workspace_hash Session.add(cached_job)
def initialise_default_resources(): """Initialise default Resources.""" from reana_db.database import Session existing_resources = [r.name for r in Resource.query.all()] default_resources = [] resource_type_to_unit = { ResourceType.cpu: ResourceUnit.milliseconds, ResourceType.disk: ResourceUnit.bytes_, } for type_, name in DEFAULT_QUOTA_RESOURCES.items(): if name not in existing_resources: default_resources.append( Resource( name=name, type_=ResourceType[type_], unit=resource_type_to_unit[ResourceType[type_]], title="Default {} resource.".format(type_), )) if default_resources: Session.add_all(default_resources) Session.commit() return default_resources
def _update_run_progress(workflow_uuid, msg): """Register succeeded Jobs to DB.""" workflow = Session.query(Workflow).filter_by(id_=workflow_uuid).one_or_none() cached_jobs = None job_progress = workflow.job_progress if "cached" in msg["progress"]: cached_jobs = msg["progress"]["cached"] for status in PROGRESS_STATUSES: if status in msg["progress"]: previous_status = workflow.job_progress.get(status) previous_total = 0 if previous_status: previous_total = previous_status.get("total") or 0 if status == "total": if previous_total > 0: continue else: job_progress["total"] = msg["progress"]["total"] else: if previous_status: new_job_ids = set(previous_status.get("job_ids") or set()) | set( msg["progress"][status]["job_ids"] ) else: new_job_ids = set(msg["progress"][status]["job_ids"]) job_progress[status] = { "total": len(new_job_ids), "job_ids": list(new_job_ids), } workflow.job_progress = job_progress flag_modified(workflow, "job_progress") Session.add(workflow)
def app(base_app): """Flask application fixture. Scope: function This fixture offers a Flask application with already a database connection and all the models created. When finished it will delete all models. .. code-block:: python def create_ninja_turtle() with app.test_client() as client: somedata = 'ninja turtle' res = client.post(url_for('api.create_object'), content_type='application/json', data=json.dumps(somedata)) assert res.status_code == 200 """ from reana_db.database import Session from reana_db.models import Base, Resource engine = create_engine(base_app.config["SQLALCHEMY_DATABASE_URI"]) base_app.session.bind = engine with base_app.app_context(): if not engine.dialect.has_schema(engine, "__reana"): engine.execute(CreateSchema("__reana")) if not database_exists(engine.url): create_database(engine.url) Base.metadata.create_all(bind=engine) Resource.initialise_default_resources() yield base_app Session.close() # close hanging connections Base.metadata.drop_all(bind=engine)
def store_logs(logs, job_id): """Write logs to DB.""" try: logging.info("Storing job logs: {}".format(job_id)) Session.query(Job).filter_by(id_=job_id).update(dict(logs=logs)) Session.commit() except Exception as e: logging.error("Exception while saving logs: {}".format(str(e)), exc_info=True)
def token_grant(admin_access_token, id_, email): """Grant a token to the selected user.""" try: admin = User.query.filter_by(id_=ADMIN_USER_ID).one_or_none() if admin_access_token != admin.access_token: raise ValueError("Admin access token invalid.") user = _get_user_by_criteria(id_, email) error_msg = None if not user: error_msg = f"User {id_ or email} does not exist." elif user.access_token: error_msg = ( f"User {user.id_} ({user.email}) has already an active access token." ) if error_msg: click.secho(f"ERROR: {error_msg}", fg="red") sys.exit(1) if user.access_token_status in [UserTokenStatus.revoked.name, None]: click.confirm( f"User {user.id_} ({user.email}) access token status" f" is {user.access_token_status}, do you want to" " proceed?", abort=True, ) user_granted_token = secrets.token_urlsafe(16) user.access_token = user_granted_token Session.commit() log_msg = (f"Token for user {user.id_} ({user.email}) granted.\n" f"\nToken: {user_granted_token}") click.secho(log_msg, fg="green") admin.log_action(AuditLogAction.grant_token, {"reana_admin": log_msg}) # send notification to user by email email_subject = "REANA access token granted" email_body = JinjaEnv.render_template( "emails/token_granted.txt", user_full_name=user.full_name, reana_hostname=REANA_HOSTNAME, ui_config=REANAConfig.load("ui"), sender_email=ADMIN_EMAIL, ) send_email(user.email, email_subject, email_body) except click.exceptions.Abort: click.echo("Grant token aborted.") except REANAEmailNotificationError as e: click.secho( "Something went wrong while sending email:\n{}".format(e), fg="red", err=True, ) except Exception as e: click.secho( "Something went wrong while granting token:\n{}".format(e), fg="red", err=True, )
def cache_job(self): """Cache a job.""" workflow = Session.query(Workflow).filter_by( id_=self.workflow_uuid).one_or_none() access_times = calculate_file_access_time(workflow.workspace_path) prepared_job_cache = JobCache() prepared_job_cache.job_id = self.job_id prepared_job_cache.access_times = access_times Session.add(prepared_job_cache) Session.commit()
def _load_yadage_spec(workflow, operational_options): """Load and save in DB the Yadage workflow specification.""" operational_options.update({"accept_metadir": True}) toplevel = operational_options.get("toplevel", "") workflow.reana_specification = yadage_load_from_workspace( workflow.workspace_path, workflow.reana_specification, toplevel, ) Session.commit()
def on_message(self, body, message): """Process messages on ``jobs-status`` queue for alive workflows. This function will ignore events about workflows that have been already terminated since a graceful finalisation of the workflow cannot be guaranteed if the workflow engine (orchestrator) is not alive. """ try: message.ack() body_dict = json.loads(body) workflow_uuid = body_dict.get("workflow_uuid") workflow = (Session.query(Workflow).filter( Workflow.id_ == workflow_uuid, Workflow.status.in_(ALIVE_STATUSES), ).one_or_none()) if workflow: next_status = body_dict.get("status") if next_status: next_status = RunStatus(next_status) logging.info( " [x] Received workflow_uuid: {0} status: {1}".format( workflow_uuid, next_status)) logs = body_dict.get("logs") or "" if workflow.can_transition_to(next_status): _update_workflow_status(workflow, next_status, logs) if "message" in body_dict and body_dict.get("message"): msg = body_dict["message"] if "progress" in msg: _update_run_progress(workflow_uuid, msg) _update_job_progress(workflow_uuid, msg) # Caching: calculate input hash and store in JobCache if "caching_info" in msg: _update_job_cache(msg) Session.commit() else: logging.error(f"Cannot transition workflow {workflow.id_}" f" from status {workflow.status} to" f" {next_status}.") elif workflow_uuid: logging.warning( "Event for not alive workflow {workflow_uuid} received:\n" "{body}\n" "Ignoring ...".format(workflow_uuid=workflow_uuid, body=body)) except REANAWorkflowControllerError as rwce: logging.error(rwce, exc_info=True) except SQLAlchemyError as sae: logging.error( f"Something went wrong while querying the database for workflow: {workflow.id_}" ) logging.error(sae, exc_info=True) except Exception as e: logging.error(f"Unexpected error while processing workflow: {e}", exc_info=True)
def stop_workflow(workflow): """Stop a given workflow.""" if workflow.status == RunStatus.running: kwrm = KubernetesWorkflowRunManager(workflow) kwrm.stop_batch_workflow_run() workflow.status = RunStatus.stopped Session.add(workflow) Session.commit() else: message = ("Workflow {id_} is not running.").format(id_=workflow.id_) raise REANAWorkflowControllerError(message)
def test_create_and_associate_reana_user(): user_email = '*****@*****.**' account_info = {'user': {'email': user_email}} user = Session.query(User).filter_by(email=user_email).\ one_or_none() assert user is None _create_and_associate_reana_user(None, account_info=account_info) user = Session.query(User).filter_by(email=user_email).\ one_or_none() assert user assert user.email == user_email
def update_workflow_logs(workflow_uuid, log_message): """Update workflow logs.""" try: logging.info('Storing workflow logs: {}'.format(workflow_uuid)) workflow = Session.query(Workflow).filter_by(id_=workflow_uuid).\ one_or_none() workflow.logs += '\n' + log_message Session.commit() except Exception as e: logging.error('Exception while saving logs: {}'.format(str(e)), exc_info=True)
def remove_workflow_jobs_from_cache(workflow): """Remove any cached jobs from given workflow. :param workflow: The workflow object that spawned the jobs. :return: None. """ jobs = Session.query(Job).filter_by(workflow_uuid=workflow.id_).all() for job in jobs: job_path = os.path.join(workflow.workspace_path, "..", "archive", str(job.id_)) Session.query(JobCache).filter_by(job_id=job.id_).delete() remove_workflow_workspace(job_path) Session.commit()
def get_top_five_resource_usage_users(self, resource_filter, order, user_resource_filter=True): """Query returning top five users according to filter.""" user_resource_subq = ( Session.query(UserResource).filter(user_resource_filter).join( UserResource.resource).filter(resource_filter).subquery()) subq_alias = aliased(UserResource, user_resource_subq) return (Session.query(User, subq_alias).join( subq_alias, UserResource).group_by(User, UserResource, subq_alias).order_by(order).limit(10))
def _update_job_progress(workflow_uuid, msg): """Update job progress for jobs in received message.""" for status in PROGRESS_STATUSES: if status in msg['progress']: status_progress = msg['progress'][status] for job_id in status_progress['job_ids']: try: uuid.UUID(job_id) except Exception: continue Session.query(Job).filter_by(id_=job_id).\ update({'workflow_uuid': workflow_uuid, 'status': status})
def set_quota_limit(ctx, admin_access_token, emails, resource_name, limit): """Set quota limits to the given users per resource.""" try: for email in emails: error_msg = None user = _get_user_by_criteria(None, email) resource = Resource.query.filter_by( name=resource_name).one_or_none() if not user: error_msg = f"ERROR: Provided user {email} does not exist." elif not resource: error_msg = ( "ERROR: Provided resource name does not exist. Available " f"resources are {[resource.name for resource in Resource.query]}." ) if error_msg: click.secho( error_msg, fg="red", err=True, ) sys.exit(1) user_resource = UserResource.query.filter_by( user=user, resource=resource).one_or_none() if user_resource: user_resource.quota_limit = limit Session.add(user_resource) else: # Create user resource in case there isn't one. Useful for old users. user.resources.append( UserResource( user_id=user.id_, resource_id=resource.id_, quota_limit=limit, quota_used=0, )) Session.commit() click.secho( f"Quota limit {limit} for '{resource.name}' successfully set to users {emails}.", fg="green", ) except Exception as e: logging.debug(traceback.format_exc()) logging.debug(str(e)) click.echo( click.style("Quota could not be set: \n{}".format(str(e)), fg="red"), err=True, )
def _get_users(_id, email, user_access_token, admin_access_token): """Return all users matching search criteria.""" admin = Session.query(User).filter_by(id_=ADMIN_USER_ID).one_or_none() if admin_access_token != admin.access_token: raise ValueError('Admin access token invalid.') search_criteria = dict() if _id: search_criteria['id_'] = _id if email: search_criteria['email'] = email if user_access_token: search_criteria['access_token'] = user_access_token users = Session.query(User).filter_by(**search_criteria).all() return users
def get_current_job_progress(workflow_id): """Return job.""" current_job_commands = {} workflow_jobs = Session.query(Job).filter_by( workflow_uuid=workflow_id).all() for workflow_job in workflow_jobs: job = Session.query(Job).filter_by(id_=workflow_job.id_).\ order_by(Job.created.desc()).first() if job: current_job_commands[str(job.id_)] = { 'prettified_cmd': job.prettified_cmd, 'current_job_name': job.job_name } return current_job_commands
def remove_workflow_jobs_from_cache(workflow): """Remove any cached jobs from given workflow. :param workflow: The workflow object that spawned the jobs. :return: None. """ jobs = Session.query(Job).filter_by(workflow_uuid=workflow.id_).all() for job in jobs: job_path = remove_upper_level_references( os.path.join(workflow.get_workspace(), '..', 'archive', str(job.id_))) Session.query(JobCache).filter_by(job_id=job.id_).delete() remove_workflow_workspace(job_path) Session.commit()
def _get_users(_id, email, user_access_token, admin_access_token): """Return all users matching search criteria.""" admin = Session.query(User).filter_by(id_=ADMIN_USER_ID).one_or_none() if admin_access_token != admin.access_token: raise ValueError("Admin access token invalid.") search_criteria = dict() if _id: search_criteria["id_"] = _id if email: search_criteria["email"] = email query = Session.query(User).filter_by(**search_criteria) if user_access_token: query = query.join(User.tokens).filter_by(token=user_access_token, type_=UserTokenType.reana) return query.all()
def token_revoke(admin_access_token, id_, email): """Revoke selected user's token.""" try: admin = User.query.filter_by(id_=ADMIN_USER_ID).one_or_none() if admin_access_token != admin.access_token: raise ValueError("Admin access token invalid.") user = _get_user_by_criteria(id_, email) error_msg = None if not user: error_msg = f"User {id_ or email} does not exist." elif not user.access_token: error_msg = (f"User {user.id_} ({user.email}) does not have an" " active access token.") if error_msg: click.secho(f"ERROR: {error_msg}", fg="red") sys.exit(1) revoked_token = user.access_token user.active_token.status = UserTokenStatus.revoked Session.commit() log_msg = (f"User token {revoked_token} ({user.email}) was" " successfully revoked.") click.secho(log_msg, fg="green") admin.log_action(AuditLogAction.revoke_token, {"reana_admin": log_msg}) # send notification to user by email email_subject = "REANA access token revoked" email_body = JinjaEnv.render_template( "emails/token_revoked.txt", user_full_name=user.full_name, reana_hostname=REANA_HOSTNAME, ui_config=REANAConfig.load("ui"), sender_email=ADMIN_EMAIL, ) send_email(user.email, email_subject, email_body) except REANAEmailNotificationError as e: click.secho( "Something went wrong while sending email:\n{}".format(e), fg="red", err=True, ) except Exception as e: click.secho( "Something went wrong while revoking token:\n{}".format(e), fg="red", err=True, )
def get_user_from_token(access_token): """Validate that the token provided is valid.""" user = Session.query(User).filter_by(access_token=access_token).\ one_or_none() if not user: raise ValueError('Token not valid.') return str(user.id_)
def stop_interactive_session(self, interactive_session_id): """Stop an interactive workflow run.""" int_session = InteractiveSession.query.filter_by( id_=interactive_session_id).first() if not int_session: raise REANAInteractiveSessionError( "Interactive session for workflow {} does not exist.".format( self.workflow.name)) action_completed = True try: delete_k8s_ingress_object( ingress_name=int_session.name, namespace=REANA_RUNTIME_KUBERNETES_NAMESPACE, ) except Exception as e: action_completed = False raise REANAInteractiveSessionError( "Unkown error while stopping interactive session:\n{}".format( e)) finally: if action_completed: # TODO: once multiple sessions will be supported instead of # deleting a session, its status should be changed to "stopped" # int_session.status = RunStatus.stopped current_db_sessions = Session.object_session(self.workflow) current_db_sessions.delete(int_session) current_db_sessions.commit()