Beispiel #1
0
def build_workflow_logs(workflow, steps=None):
    """Return the logs for all jobs of a workflow."""
    if steps:
        jobs = (
            Session.query(Job)
            .filter(and_(Job.workflow_uuid == workflow.id_, Job.job_name.in_(steps)))
            .order_by(Job.created)
            .all()
        )
    else:
        jobs = (
            Session.query(Job)
            .filter_by(workflow_uuid=workflow.id_)
            .order_by(Job.created)
            .all()
        )
    all_logs = {}
    for job in jobs:
        item = {
            "workflow_uuid": str(job.workflow_uuid) or "",
            "job_name": job.job_name or "",
            "compute_backend": job.compute_backend or "",
            "backend_job_id": job.backend_job_id or "",
            "docker_img": job.docker_img or "",
            "cmd": job.prettified_cmd or "",
            "status": job.status.name or "",
            "logs": job.logs or "",
        }
        all_logs[str(job.id_)] = item

    return all_logs
Beispiel #2
0
def store_logs(logs, job_id):
    """Write logs to DB."""
    try:
        logging.info("Storing job logs: {}".format(job_id))
        Session.query(Job).filter_by(id_=job_id).update(dict(logs=logs))
        Session.commit()
    except Exception as e:
        logging.error("Exception while saving logs: {}".format(str(e)),
                      exc_info=True)
Beispiel #3
0
def test_create_and_associate_reana_user():
    user_email = '*****@*****.**'
    account_info = {'user': {'email': user_email}}
    user = Session.query(User).filter_by(email=user_email).\
        one_or_none()
    assert user is None
    _create_and_associate_reana_user(None, account_info=account_info)
    user = Session.query(User).filter_by(email=user_email).\
        one_or_none()
    assert user
    assert user.email == user_email
Beispiel #4
0
 def get_top_five_resource_usage_users(self,
                                       resource_filter,
                                       order,
                                       user_resource_filter=True):
     """Query returning top five users according to filter."""
     user_resource_subq = (
         Session.query(UserResource).filter(user_resource_filter).join(
             UserResource.resource).filter(resource_filter).subquery())
     subq_alias = aliased(UserResource, user_resource_subq)
     return (Session.query(User, subq_alias).join(
         subq_alias,
         UserResource).group_by(User, UserResource,
                                subq_alias).order_by(order).limit(10))
def _update_job_progress(workflow_uuid, msg):
    """Update job progress for jobs in received message."""
    for status in PROGRESS_STATUSES:
        if status in msg['progress']:
            status_progress = msg['progress'][status]
            for job_id in status_progress['job_ids']:
                try:
                    uuid.UUID(job_id)
                except Exception:
                    continue
                Session.query(Job).filter_by(id_=job_id).\
                    update({'workflow_uuid': workflow_uuid,
                            'status': status})
Beispiel #6
0
def remove_workflow_jobs_from_cache(workflow):
    """Remove any cached jobs from given workflow.

    :param workflow: The workflow object that spawned the jobs.
    :return: None.
    """
    jobs = Session.query(Job).filter_by(workflow_uuid=workflow.id_).all()
    for job in jobs:
        job_path = os.path.join(workflow.workspace_path, "..", "archive",
                                str(job.id_))
        Session.query(JobCache).filter_by(job_id=job.id_).delete()
        remove_workflow_workspace(job_path)
    Session.commit()
Beispiel #7
0
def get_current_job_progress(workflow_id):
    """Return job."""
    current_job_commands = {}
    workflow_jobs = Session.query(Job).filter_by(
        workflow_uuid=workflow_id).all()
    for workflow_job in workflow_jobs:
        job = Session.query(Job).filter_by(id_=workflow_job.id_).\
            order_by(Job.created.desc()).first()
        if job:
            current_job_commands[str(job.id_)] = {
                'prettified_cmd': job.prettified_cmd,
                'current_job_name': job.job_name
            }
    return current_job_commands
def remove_workflow_jobs_from_cache(workflow):
    """Remove any cached jobs from given workflow.

    :param workflow: The workflow object that spawned the jobs.
    :return: None.
    """
    jobs = Session.query(Job).filter_by(workflow_uuid=workflow.id_).all()
    for job in jobs:
        job_path = remove_upper_level_references(
            os.path.join(workflow.get_workspace(), '..', 'archive',
                         str(job.id_)))
        Session.query(JobCache).filter_by(job_id=job.id_).delete()
        remove_workflow_workspace(job_path)
    Session.commit()
Beispiel #9
0
def _get_users(_id, email, user_access_token, admin_access_token):
    """Return all users matching search criteria."""
    admin = Session.query(User).filter_by(id_=ADMIN_USER_ID).one_or_none()
    if admin_access_token != admin.access_token:
        raise ValueError('Admin access token invalid.')
    search_criteria = dict()
    if _id:
        search_criteria['id_'] = _id
    if email:
        search_criteria['email'] = email
    if user_access_token:
        search_criteria['access_token'] = user_access_token
    users = Session.query(User).filter_by(**search_criteria).all()
    return users
Beispiel #10
0
def _get_users(_id, email, user_access_token, admin_access_token):
    """Return all users matching search criteria."""
    admin = Session.query(User).filter_by(id_=ADMIN_USER_ID).one_or_none()
    if admin_access_token != admin.access_token:
        raise ValueError("Admin access token invalid.")
    search_criteria = dict()
    if _id:
        search_criteria["id_"] = _id
    if email:
        search_criteria["email"] = email
    query = Session.query(User).filter_by(**search_criteria)
    if user_access_token:
        query = query.join(User.tokens).filter_by(token=user_access_token,
                                                  type_=UserTokenType.reana)
    return query.all()
Beispiel #11
0
def _create_and_associate_reana_user(sender, token=None,
                                     response=None, account_info=None):
    try:
        user_email = account_info['user']['email']
        user_fullname = account_info['user']['profile']['full_name']
        username = account_info['user']['profile']['username']
        search_criteria = dict()
        search_criteria['email'] = user_email
        users = Session.query(User).filter_by(**search_criteria).all()
        if users:
            user = users[0]
        else:
            user_access_token = secrets.token_urlsafe(16)
            user_parameters = dict(access_token=user_access_token)
            user_parameters['email'] = user_email
            user_parameters['full_name'] = user_fullname
            user_parameters['username'] = username
            user = User(**user_parameters)
            Session.add(user)
            Session.commit()
    except (InvalidRequestError, IntegrityError):
        Session.rollback()
        raise ValueError('Could not create user, '
                         'possible constraint violation')
    except Exception:
        raise ValueError('Could not create user')
    return user
Beispiel #12
0
def build_workflow_logs(workflow, steps=None, paginate=None):
    """Return the logs for all jobs of a workflow."""
    query = Session.query(Job).filter_by(workflow_uuid=workflow.id_)
    if steps:
        query = query.filter(Job.job_name.in_(steps))
    query = query.order_by(Job.created)
    jobs = paginate(query).get("items") if paginate else query
    all_logs = OrderedDict()
    for job in jobs:
        started_at = (job.started_at.strftime(WORKFLOW_TIME_FORMAT)
                      if job.started_at else None)
        finished_at = (job.finished_at.strftime(WORKFLOW_TIME_FORMAT)
                       if job.finished_at else None)
        item = {
            "workflow_uuid": str(job.workflow_uuid) or "",
            "job_name": job.job_name or "",
            "compute_backend": job.compute_backend or "",
            "backend_job_id": job.backend_job_id or "",
            "docker_img": job.docker_img or "",
            "cmd": job.prettified_cmd or "",
            "status": job.status.name or "",
            "logs": job.logs or "",
            "started_at": started_at,
            "finished_at": finished_at,
        }
        all_logs[str(job.id_)] = item

    return all_logs
def _update_job_cache(msg):
    """Update caching information for finished job."""
    cached_job = (Session.query(JobCache).filter_by(
        job_id=msg["caching_info"].get("job_id")).first())

    input_files = []
    if cached_job:
        file_access_times = calculate_file_access_time(
            msg["caching_info"].get("workflow_workspace"))
        for filename in cached_job.access_times:
            if filename in file_access_times:
                input_files.append(filename)
    else:
        return
    cmd = msg["caching_info"]["job_spec"]["cmd"]
    # removes cd to workspace, to be refactored
    clean_cmd = ";".join(cmd.split(";")[1:])
    msg["caching_info"]["job_spec"]["cmd"] = clean_cmd

    if "workflow_workspace" in msg["caching_info"]["job_spec"]:
        del msg["caching_info"]["job_spec"]["workflow_workspace"]
    input_hash = calculate_job_input_hash(msg["caching_info"]["job_spec"],
                                          msg["caching_info"]["workflow_json"])
    workspace_hash = calculate_hash_of_dir(
        msg["caching_info"].get("workflow_workspace"), input_files)
    if workspace_hash == -1:
        return

    cached_job.parameters = input_hash
    cached_job.result_path = msg["caching_info"].get("result_path")
    cached_job.workspace_hash = workspace_hash
    Session.add(cached_job)
Beispiel #14
0
    def get_jobs_by_status_and_compute_backend(self, status, compute_backend=None):
        """Get the number of jobs in status ``status`` from ``compute_backend``."""
        query = Session.query(Job).filter(Job.status == status)
        if compute_backend:
            query = query.filter(Job.compute_backend == compute_backend)

        return query.count()
Beispiel #15
0
def _create_and_associate_reana_user(sender,
                                     token=None,
                                     response=None,
                                     account_info=None):
    try:
        user_email = account_info["user"]["email"]
        user_fullname = account_info["user"]["profile"]["full_name"]
        username = account_info["user"]["profile"]["username"]
        search_criteria = dict()
        search_criteria["email"] = user_email
        users = Session.query(User).filter_by(**search_criteria).all()
        if users:
            user = users[0]
        else:
            user_parameters = dict(email=user_email,
                                   full_name=user_fullname,
                                   username=username)
            user = User(**user_parameters)
            Session.add(user)
            Session.commit()
    except (InvalidRequestError, IntegrityError):
        Session.rollback()
        raise ValueError("Could not create user, "
                         "possible constraint violation")
    except Exception:
        raise ValueError("Could not create user")
    return user
Beispiel #16
0
def _get_user_from_invenio_user(id):
    user = Session.query(User).filter_by(email=id).one_or_none()
    if not user:
        raise ValueError("No users registered with this id")
    if user.access_token_status == UserTokenStatus.revoked.name:
        raise ValueError("User access token revoked.")
    return user
def _update_workflow_status(workflow_uuid, status, logs):
    """Update workflow status in DB."""
    Workflow.update_workflow_status(Session, workflow_uuid, status, logs, None)
    workflow = Session.query(Workflow).filter_by(id_=workflow_uuid)\
        .one_or_none()
    if workflow.git_ref:
        _update_commit_status(workflow, status)
def _update_run_progress(workflow_uuid, msg):
    """Register succeeded Jobs to DB."""
    workflow = Session.query(Workflow).filter_by(id_=workflow_uuid).\
        one_or_none()
    cached_jobs = None
    job_progress = workflow.job_progress
    if "cached" in msg['progress']:
        cached_jobs = msg['progress']['cached']
    for status in PROGRESS_STATUSES:
        if status in msg['progress']:
            previous_status = workflow.job_progress.get(status)
            previous_total = 0
            if previous_status:
                previous_total = previous_status.get('total') or 0
            if status == 'total':
                if previous_total > 0:
                    continue
                else:
                    job_progress['total'] = \
                        msg['progress']['total']
            else:
                if previous_status:
                    new_job_ids = set(previous_status.get('job_ids') or
                                      set()) | \
                        set(msg['progress'][status]['job_ids'])
                else:
                    new_job_ids = set(msg['progress'][status]['job_ids'])
                job_progress[status] = {
                    'total': len(new_job_ids),
                    'job_ids': list(new_job_ids)
                }
    workflow.job_progress = job_progress
    flag_modified(workflow, 'job_progress')
    Session.add(workflow)
def _update_job_cache(msg):
    """Update caching information for finished job."""
    cached_job = Session.query(JobCache).filter_by(
        job_id=msg['caching_info'].get('job_id')).first()

    input_files = []
    if cached_job:
        file_access_times = calculate_file_access_time(
            msg['caching_info'].get('workflow_workspace'))
        for filename in cached_job.access_times:
            if filename in file_access_times:
                input_files.append(filename)
    else:
        return
    cmd = msg['caching_info']['job_spec']['cmd']
    # removes cd to workspace, to be refactored
    clean_cmd = ';'.join(cmd.split(';')[1:])
    msg['caching_info']['job_spec']['cmd'] = clean_cmd

    if 'workflow_workspace' in msg['caching_info']['job_spec']:
        del msg['caching_info']['job_spec']['workflow_workspace']
    input_hash = calculate_job_input_hash(msg['caching_info']['job_spec'],
                                          msg['caching_info']['workflow_json'])
    workspace_hash = calculate_hash_of_dir(
        msg['caching_info'].get('workflow_workspace'), input_files)
    if workspace_hash == -1:
        return

    cached_job.parameters = input_hash
    cached_job.result_path = msg['caching_info'].get('result_path')
    cached_job.workspace_hash = workspace_hash
    Session.add(cached_job)
Beispiel #20
0
def get_user_from_token(access_token):
    """Validate that the token provided is valid."""
    user = Session.query(User).filter_by(access_token=access_token).\
        one_or_none()
    if not user:
        raise ValueError('Token not valid.')
    return str(user.id_)
 def on_message(self, body, message):
     """On new message event handler."""
     message.ack()
     body_dict = json.loads(body)
     workflow_uuid = body_dict.get("workflow_uuid")
     if workflow_uuid:
         workflow = (
             Session.query(Workflow).filter_by(id_=workflow_uuid).one_or_none()
         )
         next_status = body_dict.get("status")
         if next_status:
             next_status = RunStatus(next_status)
             print(
                 " [x] Received workflow_uuid: {0} status: {1}".format(
                     workflow_uuid, next_status
                 )
             )
         logs = body_dict.get("logs") or ""
         if workflow.can_transition_to(next_status):
             _update_workflow_status(workflow, next_status, logs)
             if "message" in body_dict and body_dict.get("message"):
                 msg = body_dict["message"]
                 if "progress" in msg:
                     _update_run_progress(workflow_uuid, msg)
                     _update_job_progress(workflow_uuid, msg)
                 # Caching: calculate input hash and store in JobCache
                 if "caching_info" in msg:
                     _update_job_cache(msg)
             Session.commit()
         else:
             logging.error(
                 f"Cannot transition workflow {workflow.id_}"
                 f" from status {workflow.status} to"
                 f" {next_status}."
             )
def _update_run_progress(workflow_uuid, msg):
    """Register succeeded Jobs to DB."""
    workflow = Session.query(Workflow).filter_by(id_=workflow_uuid).one_or_none()
    cached_jobs = None
    job_progress = workflow.job_progress
    if "cached" in msg["progress"]:
        cached_jobs = msg["progress"]["cached"]
    for status in PROGRESS_STATUSES:
        if status in msg["progress"]:
            previous_status = workflow.job_progress.get(status)
            previous_total = 0
            if previous_status:
                previous_total = previous_status.get("total") or 0
            if status == "total":
                if previous_total > 0:
                    continue
                else:
                    job_progress["total"] = msg["progress"]["total"]
            else:
                if previous_status:
                    new_job_ids = set(previous_status.get("job_ids") or set()) | set(
                        msg["progress"][status]["job_ids"]
                    )
                else:
                    new_job_ids = set(msg["progress"][status]["job_ids"])
                job_progress[status] = {
                    "total": len(new_job_ids),
                    "job_ids": list(new_job_ids),
                }
    workflow.job_progress = job_progress
    flag_modified(workflow, "job_progress")
    Session.add(workflow)
Beispiel #23
0
 def _get_workflow(self):
     """Get workflow from db."""
     workflow = Session.query(Workflow).filter_by(id_=self.workflow_uuid).\
         one_or_none()
     if workflow:
         return workflow
     else:
         pass
Beispiel #24
0
    def stuck_workflows(self):
        """Get the number of stuck workflows."""
        inactivity_threshold = datetime.now() - timedelta(hours=12)
        number = (Session.query(Workflow).filter(
            Workflow.status == RunStatus.running).filter(
                Workflow.run_started_at <= inactivity_threshold).filter(
                    Workflow.updated <= inactivity_threshold).count())

        return number
Beispiel #25
0
def get_user_from_token(access_token):
    """Validate that the token provided is valid."""
    user = (Session.query(User).join(User.tokens).filter_by(
        token=access_token, type_=UserTokenType.reana)).one_or_none()
    if not user:
        raise ValueError("Token not valid.")
    if user.access_token_status == UserTokenStatus.revoked.name:
        raise ValueError("User access token revoked.")
    return user
 def cache_job(self):
     """Cache a job."""
     workflow = Session.query(Workflow).filter_by(
         id_=self.workflow_uuid).one_or_none()
     access_times = calculate_file_access_time(workflow.workspace_path)
     prepared_job_cache = JobCache()
     prepared_job_cache.job_id = self.job_id
     prepared_job_cache.access_times = access_times
     Session.add(prepared_job_cache)
     Session.commit()
Beispiel #27
0
 def get_top_five(self, resource_type):
     """Get the top five users according to quota usage."""
     users = (
         Session.query(UserResource)
         .join(UserResource.resource)
         .filter(Resource.type_ == resource_type)
         .order_by(UserResource.quota_used.desc())
         .limit(5)
     )
     return self.format_user_data(users)
    def on_message(self, body, message):
        """Process messages on ``jobs-status`` queue for alive workflows.

        This function will ignore events about workflows that have been already
        terminated since a graceful finalisation of the workflow cannot be
        guaranteed if the workflow engine (orchestrator) is not alive.
        """
        try:
            message.ack()
            body_dict = json.loads(body)
            workflow_uuid = body_dict.get("workflow_uuid")
            workflow = (Session.query(Workflow).filter(
                Workflow.id_ == workflow_uuid,
                Workflow.status.in_(ALIVE_STATUSES),
            ).one_or_none())
            if workflow:
                next_status = body_dict.get("status")
                if next_status:
                    next_status = RunStatus(next_status)
                    logging.info(
                        " [x] Received workflow_uuid: {0} status: {1}".format(
                            workflow_uuid, next_status))

                logs = body_dict.get("logs") or ""
                if workflow.can_transition_to(next_status):
                    _update_workflow_status(workflow, next_status, logs)
                    if "message" in body_dict and body_dict.get("message"):
                        msg = body_dict["message"]
                        if "progress" in msg:
                            _update_run_progress(workflow_uuid, msg)
                            _update_job_progress(workflow_uuid, msg)
                        # Caching: calculate input hash and store in JobCache
                        if "caching_info" in msg:
                            _update_job_cache(msg)
                    Session.commit()
                else:
                    logging.error(f"Cannot transition workflow {workflow.id_}"
                                  f" from status {workflow.status} to"
                                  f" {next_status}.")
            elif workflow_uuid:
                logging.warning(
                    "Event for not alive workflow {workflow_uuid} received:\n"
                    "{body}\n"
                    "Ignoring ...".format(workflow_uuid=workflow_uuid,
                                          body=body))
        except REANAWorkflowControllerError as rwce:
            logging.error(rwce, exc_info=True)
        except SQLAlchemyError as sae:
            logging.error(
                f"Something went wrong while querying the database for workflow: {workflow.id_}"
            )
            logging.error(sae, exc_info=True)
        except Exception as e:
            logging.error(f"Unexpected error while processing workflow: {e}",
                          exc_info=True)
Beispiel #29
0
def update_workflow_logs(workflow_uuid, log_message):
    """Update workflow logs."""
    try:
        logging.info('Storing workflow logs: {}'.format(workflow_uuid))
        workflow = Session.query(Workflow).filter_by(id_=workflow_uuid).\
            one_or_none()
        workflow.logs += '\n' + log_message
        Session.commit()
    except Exception as e:
        logging.error('Exception while saving logs: {}'.format(str(e)),
                      exc_info=True)
Beispiel #30
0
 def get_active(self):
     """Get the number of active interactive sessions."""
     non_active_statuses = [
         RunStatus.stopped,
         RunStatus.deleted,
         RunStatus.failed,
     ]
     active_interactive_sessions = (
         Session.query(InteractiveSession).filter(
             InteractiveSession.status.notin_(non_active_statuses)).count())
     return active_interactive_sessions