def get_statement_execution_log(statement_execution_id):
    with DBSession() as session:
        statement_execution = logic.get_statement_execution_by_id(
            statement_execution_id, session=session)
        api_assert(statement_execution is not None,
                   message="Invalid statement execution")
        verify_query_execution_permission(
            statement_execution.query_execution_id, session=session)

        log_path = statement_execution.log_path
        try:
            if log_path.startswith("stream"):
                logs = logic.get_statement_execution_stream_logs(
                    statement_execution_id)
                return list(map(lambda log: log.log, logs))
            else:
                with DBSession() as session:
                    MAX_LOG_RETURN_LINES = 2000
                    result = ""

                    statement_execution = logic.get_statement_execution_by_id(
                        statement_execution_id, session=session)
                    if statement_execution is not None and statement_execution.has_log:
                        with GenericReader(
                                statement_execution.log_path) as reader:
                            result = reader.read_lines(
                                number_of_lines=MAX_LOG_RETURN_LINES)
                            if len(result) == MAX_LOG_RETURN_LINES:
                                result += [
                                    "---------------------------------------------------------------------------",
                                    f"We are truncating results since it reached limit of {MAX_LOG_RETURN_LINES} lines.",
                                ]
                            return result
        except FileDoesNotExist as e:
            abort(RESOURCE_NOT_FOUND_STATUS_CODE, str(e))
    def write_csv_to_sheet(
        self,
        sheet,
        statement_execution_id: int,
        worksheet_title: str,
        start_cell: str,
    ):
        with DBSession() as session:
            max_rows = self._get_max_rows(
                statement_execution_id, start_cell=start_cell, session=session
            )

            num_rows = self._get_statement_execution_num_rows(
                statement_execution_id, session=session
            )
            num_cols = self._get_statement_execution_num_cols(
                statement_execution_id, session=session
            )

            start_cell_coord = worksheet_coord_to_coord(start_cell)
            end_cell_coord = (
                start_cell_coord[0] + num_cols - 1,
                start_cell_coord[1] + num_rows - 1,
            )
            end_cell = coord_to_worksheet_coord(end_cell_coord[0], end_cell_coord[1])

            with gspread_worksheet(
                sheet, worksheet_title, end_cell_coord[0], end_cell_coord[1]
            ) as worksheet:
                csv = self._get_statement_execution_result_iter(
                    statement_execution_id, number_of_lines=max_rows, session=session
                )
                self._update_worksheet(worksheet, start_cell, end_cell, csv)
def export_statement_execution_result(statement_execution_id,
                                      exporter_name,
                                      exporter_params=None):
    with DBSession() as session:
        statement_execution = logic.get_statement_execution_by_id(
            statement_execution_id, session=session)
        api_assert(statement_execution is not None,
                   message="Invalid statement execution")
        verify_query_execution_permission(
            statement_execution.query_execution_id, session=session)

    exporter = get_exporter(exporter_name)
    api_assert(exporter is not None, f"Invalid export name {exporter_name}")

    if exporter_params:
        valid, reason = validate_form(exporter.export_form, exporter_params)
        api_assert(valid, "Invalid exporter params, reason: " + reason)

    task = export_query_execution_task.apply_async(args=[
        exporter.exporter_name,
        statement_execution_id,
        current_user.id,
        exporter_params or {},
    ], )

    return task.task_id
def download_statement_execution_result(statement_execution_id):
    with DBSession() as session:
        statement_execution = logic.get_statement_execution_by_id(
            statement_execution_id, session=session)
        api_assert(statement_execution is not None,
                   message="Invalid statement execution")
        verify_query_execution_permission(
            statement_execution.query_execution_id, session=session)

        download_file_name = f"result_{statement_execution.query_execution_id}_{statement_execution_id}.csv"

        reader = GenericReader(statement_execution.result_path)
        response = None
        if reader.has_download_url:
            # If the Reader can generate a download,
            # we let user download the file by redirection
            download_url = reader.get_download_url(
                custom_name=download_file_name)
            response = redirect(download_url)
        else:
            # We read the raw file and download it for the user
            reader.start()
            raw = reader.read_raw()
            response = Response(raw)
            response.headers["Content-Type"] = "text/csv"
            response.headers[
                "Content-Disposition"] = f'attachment; filename="{download_file_name}"'
        return response
Exemple #5
0
def update_datadoc_editor(
        id,
        write=None,
        read=None,
        originator=None,  # Used for websocket to identify sender, optional
):
    with DBSession() as session:
        editor = logic.get_data_doc_editor_by_id(id, session=session)
        if editor:
            assert_can_write(editor.data_doc_id, session=session)

        editor = logic.update_data_doc_editor(id, read, write, session=session)
        if editor:
            editor_dict = editor.to_dict()
            socketio.emit(
                "data_doc_editor",
                (
                    originator,
                    editor_dict["data_doc_id"],
                    editor_dict["uid"],
                    editor_dict,
                ),
                namespace="/datadoc",
                room=editor_dict["data_doc_id"],
                broadcast=True,
            )
            return editor_dict
Exemple #6
0
def update_datadoc_schedule(id, cron=None, enabled=None, kwargs=None):
    if kwargs is not None:
        kwargs_valid, kwargs_valid_reason = validate_datadoc_schedule_config(
            kwargs)
        api_assert(kwargs_valid, kwargs_valid_reason)
    if cron is not None:
        api_assert(validate_cron(cron), "Invalid cron expression")

    schedule_name = schedule_logic.get_data_doc_schedule_name(id)
    with DBSession() as session:
        assert_can_write(id, session=session)

        schedule = schedule_logic.get_task_schedule_by_name(schedule_name,
                                                            session=session)
        api_assert(schedule, "Schedule does not exist")
        verify_data_doc_permission(id, session=session)

        updated_fields = {}
        if cron is not None:
            updated_fields["cron"] = cron
        if enabled is not None:
            updated_fields["enabled"] = enabled
        if kwargs is not None:
            updated_fields["kwargs"] = {
                **kwargs,
                "user_id": current_user.id,
                "doc_id": id,
            }

        return schedule_logic.update_task_schedule(
            schedule.id,
            session=session,
            **updated_fields,
        )
Exemple #7
0
def create_data_doc_from_execution(
    environment_id,
    execution_id,
    engine_id,
    query_string,
    title=None,
):
    with DBSession() as session:
        verify_environment_permission([environment_id])
        environment = Environment.get(id=environment_id, session=session)
        execution = get_query_execution_by_id(execution_id, session=session)
        uid = current_user.id
        api_assert(execution.uid == uid,
                   "You can only create from your own executions.")

        return logic.create_data_doc_from_execution(
            environment_id=environment_id,
            owner_uid=uid,
            engine_id=engine_id,
            query_string=query_string,
            execution_id=execution_id,
            public=environment.shareable,
            archived=False,
            title=title,
            meta={},
            session=session,
        )
Exemple #8
0
def create_metastore_schedule(
    id, cron,
):
    with DBSession() as session:
        return logic.create_query_metastore_update_schedule(
            metastore_id=id, cron=cron, session=session
        )
Exemple #9
0
def get_table_column_stats(column_id):
    """Get all table stats column by id"""
    with DBSession() as session:
        column = logic.get_column_by_id(column_id, session=session)
        verify_data_table_permission(column.table_id, session=session)
        return DataTableColumnStatistics.get_all(column_id=column_id,
                                                 session=session)
Exemple #10
0
def get_table_query_examples(
    table_id,
    environment_id,
    uid=None,
    engine_id=None,
    with_table_id=None,
    limit=10,
    offset=0,
):
    api_assert(limit < 100)

    with DBSession() as session:
        verify_environment_permission([environment_id])
        verify_data_table_permission(table_id, session=session)
        engines = admin_logic.get_query_engines_by_environment(environment_id,
                                                               session=session)
        engine_ids = [engine.id for engine in engines]
        api_assert(engine_id is None or engine_id in engine_ids,
                   "Invalid engine id")
        query_logs = logic.get_table_query_examples(
            table_id,
            engine_ids,
            uid=uid,
            engine_id=engine_id,
            with_table_id=with_table_id,
            limit=limit,
            offset=offset,
            session=session,
        )
        query_ids = [log.query_execution_id for log in query_logs]

        return query_ids
Exemple #11
0
def get_column(column_id, with_table=False):
    with DBSession() as session:
        column = logic.get_column_by_id(column_id, session=session)
        verify_data_table_permission(column.table_id, session=session)
        column_dict = column.to_dict(with_table)

        return column_dict
Exemple #12
0
def create_table_samples(
    table_id,
    environment_id,
    engine_id,
    partition=None,
    where=None,
    order_by=None,
    order_by_asc=True,
    limit=100,
):
    with DBSession() as session:
        api_assert(limit <= 100, "Too many rows requested")
        verify_environment_permission([environment_id])
        verify_data_table_permission(table_id, session=session)
        verify_query_engine_environment_permission(engine_id,
                                                   environment_id,
                                                   session=session)

        task = run_sample_query.apply_async(args=[
            table_id,
            engine_id,
            current_user.id,
            limit,
            partition,
            where,
            order_by,
            order_by_asc,
        ])
        return task.task_id
Exemple #13
0
def remove_table_ownership(table_id):
    """Remove a table ownership"""
    with DBSession() as session:
        verify_data_table_permission(table_id, session=session)
        return logic.delete_table_ownership(table_id=table_id,
                                            uid=current_user.id,
                                            session=session)
Exemple #14
0
def get_all_table_ownerships_by_table_id(table_id):
    """Add all table ownerships"""
    with DBSession() as session:
        verify_data_table_permission(table_id, session=session)

        return logic.get_all_table_ownerships_by_table_id(table_id=table_id,
                                                          session=session)
Exemple #15
0
def create_query_execution_notification(query_id,):
    with DBSession() as session:
        verify_query_execution_permission(query_id, session=session)

        return logic.create_query_execution_notification(
            query_execution_id=query_id, uid=current_user.id, session=session
        )
Exemple #16
0
    def __init__(self, query_execution_id, celery_task, query, statement_ranges):
        self._query_execution_id = query_execution_id

        self._celery_task = celery_task
        self._task_id = celery_task.request.id

        self.statement_execution_ids = []

        self._query = query
        self._statement_ranges = statement_ranges

        # logging variable
        self._has_log = False
        self._log_cache = ""  # [statement_logs]
        self._meta_info = None  # statement_urls
        self._percent_complete = 0  # percent_complete
        self._statement_progress = {}

        # Connect to mysql db
        with DBSession() as session:
            query_execution = qe_logic.update_query_execution(
                self._query_execution_id, task_id=self._task_id, session=session
            ).to_dict()

        # Emit a event from socketio
        socketio.emit(
            "query_received",
            query_execution,
            namespace=QUERY_EXECUTION_NAMESPACE,
            room=self._query_execution_id,
        )
Exemple #17
0
def create_query_engine(
    name,
    language,
    executor,
    executor_params,
    description=None,
    status_checker=None,
    metastore_id=None,
):
    with DBSession() as session:
        query_engine = QueryEngine.create(
            {
                "name": name,
                "description": description,
                "language": language,
                "executor": executor,
                "executor_params": executor_params,
                "metastore_id": metastore_id,
                "status_checker": status_checker,
            },
            session=session,
        )
        query_engine_dict = query_engine.to_dict_admin()

    return query_engine_dict
Exemple #18
0
def create_table_column_stats_by_name(metastore_name, data):
    """Batch add/update table column stats"""
    # TODO: verify user is a service account
    with DBSession() as session:
        metastore = admin_logic.get_query_metastore_by_name(metastore_name,
                                                            session=session)
        api_assert(metastore, "Invalid metastore")
        verify_metastore_permission(metastore.id, session=session)

        with DataTableFinder(metastore.id) as t_finder:
            for d in data:
                column = t_finder.get_table_column_by_name(
                    schema_name=d["schema_name"],
                    table_name=d["table_name"],
                    column_name=d["column_name"],
                    session=session,
                )

                if column is not None:
                    for s in d["stats"]:
                        logic.upsert_table_column_stat(
                            column_id=column.id,
                            key=s["key"],
                            value=s["value"],
                            uid=current_user.id,
                            session=session,
                        )
    return
Exemple #19
0
def get_users_in_environment(
    id, limit, offset,
):
    with DBSession() as session:
        return environment_logic.get_users_in_environment(
            id, offset, limit, session=session
        )
Exemple #20
0
    def get_metastore():
        """Lazily initialize metastore_loader from DB.
           Use outer-scope variable to memoized initialization

        Raises:
            LatestPartitionException: If the metastore does not exist for engine_id, throw error

        Returns:
           BaseMetastoreLoader: metastore loader to fetch table/schema info
        """
        nonlocal _metastore_loader
        if _metastore_loader is not None:
            return _metastore_loader

        with DBSession() as session:
            engine = admin_logic.get_query_engine_by_id(engine_id,
                                                        session=session)
            metastore_id = engine.metastore_id if engine else None
            _metastore_loader = (metastore.get_metastore_loader(
                metastore_id, session=session)
                                 if metastore_id is not None else None)

            if _metastore_loader is None:
                raise LatestPartitionException(
                    f"Unable to load metastore for engine id {engine_id}")

        return _metastore_loader
Exemple #21
0
def create_datadoc_schedule(
    id,
    cron,
    kwargs,
):
    kwargs_valid, kwargs_valid_reason = validate_datadoc_schedule_config(
        kwargs)
    api_assert(kwargs_valid, kwargs_valid_reason)
    api_assert(validate_cron(cron), "Invalid cron expression")

    schedule_name = schedule_logic.get_data_doc_schedule_name(id)
    with DBSession() as session:
        assert_can_write(id, session=session)
        data_doc = logic.get_data_doc_by_id(id, session=session)
        verify_environment_permission([data_doc.environment_id])

        return schedule_logic.create_task_schedule(
            schedule_name,
            "tasks.run_datadoc.run_datadoc",
            cron=cron,
            kwargs={
                **kwargs, "user_id": current_user.id,
                "doc_id": id
            },
            task_type="user",
            session=session,
        )
Exemple #22
0
    def oauth_callback(self):
        LOG.debug("Handling Oauth callback...")

        if request.args.get("error"):
            return f"<h1>Error: {request.args.get('error')}</h1>"

        code = request.args.get("code")
        try:
            access_token = self._fetch_access_token(code)
            username, email, fullname = self._get_user_profile(access_token)
            with DBSession() as session:
                flask_login.login_user(
                    AuthUser(
                        self.login_user(username, email, fullname, session=session)
                    )
                )
        except AuthenticationError as e:
            LOG.error("Failed authenticate oauth user", e)
            abort_unauthorized()

        next_url = QuerybookSettings.PUBLIC_URL
        if "next" in flask_session:
            next_url = flask_session["next"]
            del flask_session["next"]

        return redirect(next_url)
Exemple #23
0
def get_data_docs(environment_id,
                  filter_mode=None,
                  offset=0,
                  limit=500,
                  archived=False):
    with DBSession() as session:
        verify_environment_permission([environment_id])

        docs = []

        if filter_mode == "mine":
            docs = logic.get_data_doc_by_user(
                current_user.id,
                environment_id=environment_id,
                offset=offset,
                limit=limit,
                session=session,
            )
        elif filter_mode == "favorite":
            docs = logic.get_user_favorite_data_docs(
                current_user.id,
                environment_id=environment_id,
                session=session)
        elif filter_mode == "recent":
            docs = logic.get_user_recent_data_docs(
                current_user.id,
                environment_id=environment_id,
                session=session)
        return docs
Exemple #24
0
def run_query_task(self, query_execution_id):
    executor = None
    error_message = None
    query_execution_status = QueryExecutionStatus.INITIALIZED

    try:
        executor = create_executor_from_execution(query_execution_id,
                                                  celery_task=self)
        run_executor_until_finish(self, executor)
    except SoftTimeLimitExceeded:
        # SoftTimeLimitExceeded
        # This exception happens when query has been running for more than
        # the limited time (default 2 days)
        error_message = format_error_message(
            7408, "The execution has exceeded the maximum allowed time.")
    except QueryExecutorException as e:
        error_message = format_error_message(7403, str(e))
    except Exception as e:
        error_message = format_error_message(
            7406, "{}\n{}".format(e, traceback.format_exc()))
    finally:
        # When the finally block is reached, it is expected
        # that the executor should be in one of the end state
        with DBSession() as session:
            query_execution_status = get_query_execution_final_status(
                query_execution_id, executor, error_message, session=session)
            notifiy_on_execution_completion(query_execution_id,
                                            session=session)

            # Executor exists means the query actually executed
            # This prevents cases when query_execution got executed twice
            if executor and query_execution_status == QueryExecutionStatus.DONE:
                log_query_per_table_task.delay(query_execution_id)

    return query_execution_status.value if executor is not None else None
def search_query_execution(environment_id,
                           filters={},
                           orderBy=None,
                           limit=100,
                           offset=0):
    verify_environment_permission([environment_id])
    with DBSession() as session:
        if "user" in filters:
            api_assert(
                current_user.id == filters["user"],
                "You can only search your own queries",
            )
        else:
            filters["user"] = current_user.id
        query_executions = logic.search_query_execution(
            environment_id=environment_id,
            filters=filters,
            orderBy=orderBy,
            limit=limit,
            offset=offset,
            session=session,
        )

        result = [
            query_execution.to_dict(with_statement=False)
            for query_execution in query_executions
        ]

        return result
def search_query_snippet(environment_id, engine_id=None, is_public=False, golden=None):
    search_by = current_user.id

    with DBSession() as session:
        # TODO: Check if engine_id is under environment_id
        verify_environment_permission([environment_id])
        engine_ids = (
            [engine_id]
            if engine_id is not None
            else list(
                map(
                    lambda e: e.id,
                    admin_logic.get_query_engines_by_environment(environment_id),
                )
            )
        )

        query_snippets = logic.search_snippet(
            search_by=search_by,
            engine_ids=engine_ids,
            is_public=is_public,
            golden=golden,
            session=session,
        )

        query_snippet_dicts = [
            dict(id=snippet[0], title=snippet[1]) for snippet in query_snippets
        ]

        return query_snippet_dicts
def create_query_snippet(
    context=None,
    title=None,
    engine_id=None,
    description=None,
    is_public=None,
    golden=None,
):
    created_by = current_user.id

    api_assert(len(context) > 0, "No empty context")
    api_assert(len(title) > 0, "No empty title")
    api_assert(engine_id is not None, "Must specify engine")
    api_assert(
        not golden or current_user.is_admin, "Only data gurus can create golden snippet"
    )

    with DBSession() as session:
        verify_query_engine_permission(engine_id, session=session)

        return logic.create_snippet(
            created_by,
            context=context,
            title=title,
            engine_id=engine_id,
            description=description,
            is_public=is_public,
            golden=golden,
            session=session,
        )
Exemple #28
0
    def on_cancel(self):
        utcnow = datetime.datetime.utcnow()
        if len(self.statement_execution_ids) > 0:
            statement_execution_id = self.statement_execution_ids[-1]
            upload_path, has_log = self._upload_log(statement_execution_id)
            qe_logic.update_statement_execution(
                statement_execution_id,
                status=StatementExecutionStatus.CANCEL,
                completed_at=utcnow,
                has_log=self._has_log,
                log_path=upload_path if has_log else None,
            )

        with DBSession() as session:
            query_execution = qe_logic.update_query_execution(
                self._query_execution_id,
                status=QueryExecutionStatus.CANCEL,
                completed_at=utcnow,
                session=session,
            ).to_dict()

        socketio.emit(
            "query_cancel",
            query_execution,
            namespace=QUERY_EXECUTION_NAMESPACE,
            room=self._query_execution_id,
        )
Exemple #29
0
 def _create_tables(self, schema_tables):
     with DBSession() as session:
         for (schema_id, schema_name, table) in schema_tables:
             self._create_table_table(schema_id,
                                      schema_name,
                                      table,
                                      session=session)
Exemple #30
0
def create_tag_item(table_id, tag):
    with DBSession() as session:
        verify_data_table_permission(table_id, session=session)
        return logic.create_tag_item(table_id=table_id,
                                     tag_name=tag,
                                     uid=current_user.id,
                                     session=session)