Example #1
0
def update_query_execution_by_id(query_execution_id, session=None):
    index_name = ES_CONFIG["query_executions"]["index_name"]

    query_execution = get_query_execution_by_id(query_execution_id,
                                                session=session)
    if query_execution is None or query_execution.status != QueryExecutionStatus.DONE:
        try:
            _delete(index_name, id=query_execution_id)
        except Exception:
            LOG.error(
                "failed to delete {}. Will pass.".format(query_execution_id))
    else:
        data_cell = get_data_cell_by_query_execution_id(query_execution_id,
                                                        session=session)
        formatted_object = query_execution_to_es(query_execution,
                                                 data_cell=data_cell,
                                                 session=session)
        try:
            # Try to update if present
            updated_body = {
                "doc": formatted_object,
                "doc_as_upsert": True,
            }  # ES requires this format for updates
            _update(index_name, query_execution_id, updated_body)
        except Exception:
            LOG.error(
                "failed to upsert {}. Will pass.".format(query_execution_id))
Example #2
0
def create_data_doc_from_execution(
    environment_id,
    execution_id,
    engine_id,
    query_string,
    title=None,
):
    with DBSession() as session:
        verify_environment_permission([environment_id])
        environment = Environment.get(id=environment_id, session=session)
        execution = get_query_execution_by_id(execution_id, session=session)
        uid = current_user.id
        api_assert(execution.uid == uid,
                   "You can only create from your own executions.")

        return logic.create_data_doc_from_execution(
            environment_id=environment_id,
            owner_uid=uid,
            engine_id=engine_id,
            query_string=query_string,
            execution_id=execution_id,
            public=environment.shareable,
            archived=False,
            title=title,
            meta={},
            session=session,
        )
Example #3
0
def verify_query_execution_owner(execution_id, session=None):
    execution = query_execution_logic.get_query_execution_by_id(
        execution_id, session=session)
    api_assert(
        current_user.id == getattr(execution, "uid", None),
        "Action can only be preformed by execution owner",
    )
def notifiy_on_execution_completion(query_execution_id, session=None):
    query_execution = qe_logic.get_query_execution_by_id(query_execution_id,
                                                         session=session)

    notifications = query_execution.notifications
    if len(notifications):
        data_cell = next(iter(query_execution.cells), None)
        # TODO: this should be determined by the notification.user?
        # Come up with a more efficient way to determine env per user
        env_name = getattr(
            qe_perm_logic.get_default_user_environment_by_execution_id(
                execution_id=query_execution_id,
                uid=query_execution.uid,
                session=session,
            ),
            "name",
            None,
        )

        # If the query execution is not associated with any environment
        # then no notification can be done
        if not env_name:
            return

        for notification in notifications:
            uid = notification.user
            user = user_logic.get_user_by_id(uid, session=session)
            doc_id = None
            cell_id = None
            query_title = "Untitled"

            if data_cell is not None:
                cell_id = data_cell.id
                doc_id = data_cell.doc.id
                query_title = data_cell.meta.get("title", query_title)

            notify_user(
                user=user,
                template_name="query_completion_notification",
                template_params=dict(
                    query_execution=query_execution,
                    doc_id=doc_id,
                    cell_id=cell_id,
                    query_title=query_title,
                    public_url=QuerybookSettings.PUBLIC_URL,
                    env_name=env_name,
                ),
                session=session,
            )
Example #5
0
def cancel_query_execution(query_execution_id):
    with DBSession() as session:
        execution = logic.get_query_execution_by_id(query_execution_id, session=session)
        verify_query_engine_permission(execution.engine_id, session=session)
        execution_dict = execution.to_dict(True) if execution is not None else None

        requestor = current_user.id
        api_assert(
            requestor == execution_dict["uid"], "You can only cancel your own queries"
        )

        if execution_dict and "task_id" in execution_dict:
            task = run_query_task.AsyncResult(execution_dict["task_id"])
            if task is not None:
                task.abort()
Example #6
0
def on_join_room(query_execution_id):
    with DBSession() as session:
        execution = qe_logic.get_query_execution_by_id(
            query_execution_id, session=session
        )
        assert execution, "Invalid execution"
        verify_query_engine_permission(execution.engine_id, session=session)

        execution_dict = execution.to_dict(True) if execution is not None else None
        join_room(query_execution_id)

        if execution_dict and len(execution_dict.get("statement_executions", [])):
            statement_execution = execution_dict["statement_executions"][-1]
            # Format statement execution's logs
            if statement_execution["has_log"]:
                logs = qe_logic.get_statement_execution_stream_logs(
                    statement_execution["id"], from_end=True, session=session
                )
                statement_execution["log"] = [log.log for log in logs]

            # Getting task's running data
            if (
                "task_id" in execution_dict
                and execution_dict.get("status", None)
                == QueryExecutionStatus.RUNNING.value
            ):
                task = tasks.run_query_task.AsyncResult(execution_dict["task_id"])
                try:
                    if task is not None and task.info is not None:
                        progress = task.info
                        if str(statement_execution["id"]) in progress:
                            statement_execution["percent_complete"] = progress[
                                str(statement_execution["id"])
                            ].get("percent_complete")
                    execution_dict["total"] = progress.get("total", 0)

                except Exception as e:
                    LOG.info(e)

        emit(
            "query",
            execution_dict,
            namespace=QUERY_EXECUTION_NAMESPACE,
            room=query_execution_id,
        )
def user_can_access_query_execution(uid, execution_id, session=None):
    execution = query_execution_logic.get_query_execution_by_id(
        execution_id, session=session
    )
    if execution.uid == uid:
        return True

    execution_data_doc_ids = query_execution_logic.get_datadoc_id_from_query_execution_id(
        execution_id, session=session
    )
    if execution_data_doc_ids:
        for data_doc_pair in execution_data_doc_ids:
            doc_id, cell_id = data_doc_pair
            if user_can_read(doc_id=doc_id, uid=uid, session=session):
                return True
    return (
        QueryExecutionViewer.get(uid=uid, query_execution_id=execution_id) is not None
    )
Example #8
0
def send_query_execution_access_request_notification(execution_id, uid, session=None):
    requestor = user_logic.get_user_by_id(uid, session=session)
    query_execution = logic.get_query_execution_by_id(execution_id, session=session)
    environment = get_default_user_environment_by_execution_id(
        execution_id=execution_id, uid=uid, session=session
    )
    execution_url = f"{QuerybookSettings.PUBLIC_URL}/{environment.name}/query_execution/{execution_id}/"

    owner = user_logic.get_user_by_id(query_execution.uid, session=session)
    requestor_username = requestor.get_name()
    notify_user(
        user=owner,
        template_name="query_execution_access_request",
        template_params=dict(
            username=requestor_username,
            execution_id=execution_id,
            execution_url=execution_url,
        ),
    )
Example #9
0
def _get_query_execution_info(query_execution_id, session=None):
    query_execution = qe_logic.get_query_execution_by_id(query_execution_id,
                                                         session=session)
    if not query_execution:
        raise InvalidQueryExecution(
            f"Query {query_execution_id} does not exist")
    if query_execution.status != QueryExecutionStatus.INITIALIZED:
        # Double check to see query has been executed since
        # it could be re-inserted after celery worker failure
        raise AlreadyExecutedException(
            f"Query {query_execution_id} is already executed. This is likely caused by a worker crash."
        )

    query = query_execution.query
    statement_ranges = get_statement_ranges(query)
    uid = query_execution.uid
    engine_id = query_execution.engine_id

    _assert_safe_query(query, engine_id, session=session)
    return query, statement_ranges, uid, engine_id
Example #10
0
def get_query_execution_final_status(query_execution_id,
                                     executor,
                                     error_message,
                                     session=None):
    final_query_status = QueryExecutionStatus.INITIALIZED
    if executor:
        final_query_status = executor.status
    else:
        # If the error happens before the executor is initialized
        # we check the existing query execution status in db
        # for reference
        query_execution = qe_logic.get_query_execution_by_id(
            query_execution_id, session=session)
        if query_execution is not None:
            final_query_status = query_execution.status

    log_if_incomplete_query_status(final_query_status,
                                   query_execution_id,
                                   error_message,
                                   session=session)

    return final_query_status
def log_query_per_table_task(self, query_execution_id):
    with DBSession() as session:
        query_execution = qe_logic.get_query_execution_by_id(
            query_execution_id, session=session)
        assert query_execution.status == QueryExecutionStatus.DONE
        metastore_id = query_execution.engine.metastore_id
        if metastore_id is None:
            # This query engine has no metastore configured
            return

        statement_types = get_table_statement_type(query_execution.query)
        table_per_statement, _ = process_query(query_execution.query,
                                               query_execution.engine.language)

        sync_table_to_metastore(table_per_statement,
                                statement_types,
                                metastore_id,
                                session=session)

        datadoc_cell = next(iter(query_execution.cells), None)
        if any(statement in statement_types
               for statement in ["CREATE", "INSERT"]):
            create_lineage_from_query(query_execution,
                                      metastore_id,
                                      datadoc_cell,
                                      session=session)
        if datadoc_cell is None or not datadoc_cell.doc.public:
            return

        log_table_per_statement(
            table_per_statement,
            statement_types,
            query_execution_id,
            metastore_id,
            datadoc_cell.id,
            session=session,
        )
Example #12
0
def get_query_execution(query_execution_id):
    verify_query_execution_permission(query_execution_id)
    execution = logic.get_query_execution_by_id(query_execution_id)
    execution_dict = execution.to_dict(True) if execution is not None else None
    return execution_dict