Python Connection.execute_query Examples, tableauhyperapi.Connection.execute_query Python Examples

Example #1

0

Show file

File: _reader.py Project: rickyschools/pantab

def _read_table(*, connection: tab_api.Connection,
                table: TableType) -> pd.DataFrame:
    if isinstance(table, str):
        table = tab_api.TableName(table)

    table_def = connection.catalog.get_table_definition(table)
    columns = table_def.columns

    dtypes: Dict[str, str] = {}
    for column in columns:
        column_type = pantab_types._ColumnType(column.type, column.nullability)
        dtypes[column.name.unescaped] = _tableau_to_pandas_type(column_type)

    with connection.execute_query(f"SELECT * from {table}") as result:
        df = pd.DataFrame(result)

    df.columns = dtypes.keys()
    # The tableauhyperapi.Timestamp class is not implicitly convertible to a datetime
    # so we need to run an apply against applicable types
    for key, val in dtypes.items():
        if val == "datetime64[ns]":
            df[key] = df[key].apply(lambda x: x._to_datetime())
        elif val == "datetime64[ns, UTC]":
            df[key] = df[key].apply(lambda x: x._to_datetime()).dt.tz_localize(
                "UTC")
        elif val == "timedelta64[ns]":
            df[key] = df[key].apply(_interval_to_timedelta)

    df = df.astype(dtypes)
    df = df.fillna(value=np.nan)  # Replace any appearances of None

    return df

Example #2

0

Show file

class Hyper(System):
    def __init__(self, filename):
        self.db = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU)
        self.conn = Connection(self.db.endpoint, filename, CreateMode.CREATE)

    def create(self, ddl):
        count = self.conn.execute_command(ddl)
        return [count]

    def load(self, filename):
        count = self.conn.execute_command("COPY logs FROM '" + filename +
                                          "' WITH (FORMAT csv, HEADER)")
        return [count]

    def query(self, sql):
        #schema = result.schema()
        return self.conn.execute_query(sql)

Example #3

0

Show file

 def test_to_dss_date(self):
     schema_converter = SchemaConversion()
     path_to_hyper = "data/superstore_sample.hyper"
     hyper = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU)
     connection = Connection(hyper.endpoint, path_to_hyper)
     hyper_table = TableName('public', 'Orders')
     hyper_table_def = connection.catalog.get_table_definition(hyper_table)
     result = connection.execute_query(f'SELECT * FROM {hyper_table}')
     for row in result:
         pass
     sample_date = row[2].to_date()
     dss_date = datetime.datetime(sample_date.year, sample_date.month,
                                  sample_date.day)
     connection.close()
     hyper.close()
     dss_columns = schema_converter.hyper_columns_to_dss_columns(
         hyper_table_def.columns)
     return True

Example #4

0

Show file

def _read_table(*, connection: tab_api.Connection,
                table: TableType) -> pd.DataFrame:
    if isinstance(table, str):
        table = tab_api.TableName(table)

    table_def = connection.catalog.get_table_definition(table)
    columns = table_def.columns

    dtypes: Dict[str, str] = {}
    for column in columns:
        column_type = pantab_types._ColumnType(column.type, column.nullability)
        try:
            dtypes[column.name.
                   unescaped] = pantab_types._pandas_types[column_type]
        except KeyError as e:
            raise TypeError(
                f"Column {column.name} has unsupported datatype {column.type} "
                f"with nullability {column.nullability}") from e

    query = f"SELECT * from {table}"
    with connection.execute_query(query) as result:
        return _read_query_result(result, dtypes)

Example #5

0

Show file

File: tableau_table_reader.py Project: dataiku/dss-plugin-tableau-hyper

class TableauTableReader(object):
    def __init__(self, schema_name, table_name):
        """
        Wrapper for the Tableau Hyper formatter

        :param schema_name : name of the schema as stored in the Tableau Hyper file
        :param table_name : name of the table as stored in the Tableau Hyper file
        """

        self.table_name = table_name
        self.schema_name = schema_name

        self.hyper_table = None
        self.hyper_columns = None
        self.hyper_storage_types = None
        self.dss_columns = None
        self.dss_storage_types = None

        self.rows = []
        self.row_index = 0

        self.path_to_hyper = None

        self.hyper = None
        self.connection = None

        self.schema_converter = SchemaConversion()

        # Handle batch querying
        self.offset = 0
        self.limit = 10000
        self.end_read = False

    def create_tmp_hyper_file(self):
        """
        Create a temporary file to store the streaming buffer
        :return: self.path_to_hyper: path to the temporary file
        """
        cache_dir = get_cache_location_from_user_config()
        # Set the delete parameter to False imperatively to avoid early deletion
        self.path_to_hyper = tempfile.NamedTemporaryFile(
            suffix=".hyper",
            prefix="tmp_hyper_file_",
            delete=False,
            dir=cache_dir).name
        logger.info(
            "Creating temporary file to store future buffer stream from Hyper: {} "
            .format(self.path_to_hyper))

    def read_buffer(self, stream):
        """
        Read and store the full stream
        :param stream: stream coming from the Tableau Hyper file
        :return:
        """
        line = True
        with open(self.path_to_hyper, "ab") as f:
            while line:
                line = stream.read(1024)
                f.write(line)
        logger.info("Stored the full stream as bytes")

    def open_connection(self):
        """
        Open the connection to the Tableau Hyper file and the database
        """
        self.hyper = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU)
        self.connection = Connection(self.hyper.endpoint, self.path_to_hyper)
        logger.info("Opened the connection to Tableau Hyper file")

    def read_hyper_columns(self):
        """
        Read from the Tableau Hyper file the columns and schema of the table

        :return: self.hyper_storage_types
        """
        logger.info("Trying to read Tableau Hyper table {}.{} ...".format(
            self.schema_name, self.table_name))
        hyper_table = TableName(self.schema_name, self.table_name)
        self.hyper_table = hyper_table

        try:
            table_def = self.connection.catalog.get_table_definition(
                hyper_table)
        except HyperException as e:
            logger.warning(
                "The target table does not exists in this hyper file. Requested table: {}.{}"
                .format(self.table_name, self.schema_name))
            raise Exception("Table does not exist: {}.{}".format(
                self.schema_name, self.table_name))

        self.hyper_columns = table_def.columns
        self.hyper_storage_types = [
            column.type.tag for column in self.hyper_columns
        ]

        self.dss_columns = self.schema_converter.hyper_columns_to_dss_columns(
            self.hyper_columns)
        self.dss_storage_types = [
            column['type'] for column in self.dss_columns
        ]

        self.schema_converter.set_dss_storage_types(self.dss_storage_types)
        self.schema_converter.set_hyper_storage_types(self.hyper_storage_types)

    def fetch_rows(self, offset, limit):
        """
        Retrieve all the rows from the Tableau Hyper file, convert values on the fly
        """
        sql_hyper_query = f'SELECT {build_query(self.hyper_columns)} FROM {self.hyper_table} OFFSET {offset} LIMIT {limit}'
        logger.warning("SQL query: {} ".format(sql_hyper_query))
        try:
            result = self.connection.execute_query(sql_hyper_query)
        except Exception as err:
            logger.fatal("Tried to execute query but was unsuccessful.")
            raise err
        for row in result:
            self.rows.append(row)

    def close_connection(self):
        """
        Close the connection to the Tableau Hyper file
        """
        self.connection.close()
        self.hyper.close()
        if os.path.exists(self.path_to_hyper):
            os.remove(self.path_to_hyper)

    def read_schema(self):
        """
        Access schema
        """
        logger.info("Send to dss during read_schema: {}".format(
            self.dss_columns))
        return self.dss_columns

    def read_row(self):
        """
        Read one row from the stored data
        """
        if self.end_read:
            return None
        if len(self.rows) == 0:
            self.fetch_rows(self.offset, self.limit)
            self.offset += self.limit
        if len(self.rows) == 0:
            self.close_connection()
            self.end_read = True
            logger.info("Finished reading rows from hyper file...")
            return None
        else:
            hyper_row = self.rows.pop()
            dss_row = self.schema_converter.prepare_row_to_dss(hyper_row)
            row = {}
            for column, value in zip(self.dss_columns, dss_row):
                row[column["name"]] = value
            self.row_index += 1
            return row

Example #6

0

Show file

class HyperKernel(Kernel):
    implementation = 'Hyper'
    implementation_version = '0.0'
    language = 'sql'
    language_version = '0.0'
    language_info = {
        'name': 'sql',
        'mimetype': 'text/sql',
        'file_extension': '.sql',
    }
    banner = "Hyper ðŸš€ - Your friendly neighborhood SQL database.\n" +\
             "Type '\\?' for help."

    def __init__(self, *args, **kwargs):
        super(HyperKernel, self).__init__(*args, **kwargs)

        self._hyper_process = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU, 'jupyter_sql_kernel')
        self._connection = Connection(self._hyper_process.endpoint)
        self._output_func = self._display_output

    def do_shutdown(self, restart):
        self._connection.close()
        self._hyper_process.close()
        return {'status': 'ok', 'restart': restart}

    def _success_response(self, payloads=[]):
        return {
                'status': 'ok',
                # The base class increments the execution count for us already
                'execution_count': self.execution_count,
                'payload': payloads,
                'user_expressions': {},
               }

    def _error_response(self, ename, evalue, traceback):
        # Format & send the error message
        error_response = {
           'ename': ename,
           'evalue': evalue,
           'traceback': traceback
        }
        self.send_response(self.iopub_socket, 'error', error_response)
        error_response['status'] = 'error'
        error_response['execution_count'] = self.execution_count
        return error_response

    def _send_text(self, txt):
        self.send_response(self.iopub_socket, 'display_data', {'data': {'text/plain': txt}, 'metadata': {}})

    def _format_hyper_error(self, e):
        formatted = f"Error:\n{e.main_message}"
        if e.hint:
            formatted += f"HINT: {e.hint}"
        return formatted

    def _display_output(self, sql_result, silent):
        if not silent:
            column_names = [c.name for c in sql_result.schema.columns]
            result = list(sql_result)
            if column_names or result:
                response_data = {
                    'text/plain': tabulate(result, headers=column_names),
                    'text/html': tabulate(result, headers=column_names, tablefmt='html'),
                }
                # Integration with the "@tableau/query-graphs-jupyterlab-extension" extension for plan rendering in JupyterLab
                if column_names == ["plan"]:
                    try:
                        response_data['application/vnd.tableau.hyper-queryplan'] = json.loads("".join(row[0] for row in result))
                    except json.JSONDecodeError as e:
                        pass
                # Support for "Vega output" form Hyper.
                # In case the user is skilled enough to write a SQL query which outputs a Vega visualizations, go ahead and display the visualization in JupyterLab.
                if len(column_names) == 1 and len(result) == 1 and isinstance(result[0][0], str):
                    try:
                        parsed = json.loads(result[0][0])
                        if isinstance(parsed, dict):
                            if parsed.get("$schema", "").startswith('https://vega.github.io/schema/vega/'):
                                response_data['application/vnd.vega.v5+json'] = parsed
                                del response_data['text/html']
                            if parsed.get("$schema", "").startswith('https://vega.github.io/schema/vega-lite/'):
                                response_data['application/vnd.vegalite.v3+json'] = parsed
                                del response_data['text/html']
                    except json.JSONDecodeError as e:
                        pass
                self.send_response(self.iopub_socket, 'display_data', {'source': 'sql', 'data': response_data, 'metadata': {}})

    def _create_file_output_func(self, filename):
        def _file_output(self, sql_result, silent):
            with open(filename, "a") as f:
                column_names = [c.name for c in sql_result.schema.columns]
                result = list(sql_result)
                f.write(tabulate(result, headers=column_names))
                f.write("\n")
        return _file_output.__get__(self, HyperKernel)

    def _discard_output(self, sql_result, silent):
        if sql_result is not None and sql_result.schema is not None:
            # We still want to fetch the whole result (to not screw up timing measurements)
            for i in sql_result:
                pass

    def execute_sql(self, code, silent):
        "Execute a SQL query and display the results to the user"
        start_time = time.perf_counter()
        try:
            with self._connection.execute_query(code) as sql_result:
                self._output_func(sql_result, silent)
        except HyperException as e:
            # Format & send the error message
            return self._error_response(str("HyperException"), str(e.args[0]), [self._format_hyper_error(e)])

        end_time = time.perf_counter()
        elapsed = end_time - start_time
        self._send_text('{:.3f}s elapsed'.format(elapsed))

        return self._success_response()

    def _command_input_sql(self, args):
        """
        Read SQL query from a file and execute it
        """
        if len(args) != 1:
            return self._error_response("InvalidClientCommandArguments", repr(args), ["Unexpected number of arguments"])
        filename = args[0]
        try:
            with open(filename) as f:
                file_content = f.read()
        except:
            return self._error_response("IOError", repr(args), [f"Unable to read file '{filename}'"])
        self.execute_sql(file_content, silent=False)

    def _command_redirect_output(self, args):
        """
        Redirect output into a file
        """
        if len(args) > 1:
            return self._error_response("InvalidClientCommandArguments", repr(args), ["Unexpected number of arguments"])
        if len(args) == 0:
            self._output_func = self._display_output
        elif args[0] == "-":
            self._output_func = self._discard_output
        else:
            filename = args[0]
            # Truncate the file & create if it does not exist
            try:
                with open(filename, "w"):
                    pass
            except:
                return self._error_response("IOError", repr(args), [f"Unable to read file '{filename}'"])
            self._output_func = self._create_file_output_func(filename)

    def _command_attach(self, args):
        """
        Open a Hyper file
        """
        if len(args) != 2:
            return self._error_response("InvalidClientCommandArguments", repr(args), ["Unexpected number of arguments"])
        database_path = args[0]
        alias = args[1]
        try:
            self._connection.catalog.attach_database(database_path, alias)
        except HyperException as e:
            # Format & send the error message
            return self._error_response(str("HyperException"), str(e.args[0]), [self._format_hyper_error(e)])

    def _command_detach(self, args):
        """
        Close a Hyper file
        """
        if len(args) != 1:
            return self._error_response("InvalidClientCommandArguments", repr(args), ["Unexpected number of arguments"])
        alias = args[0]
        try:
            self._connection.catalog.detach_database(alias)
        except HyperException as e:
            # Format & send the error message
            return self._error_response(str("HyperException"), str(e.args[0]), [self._format_hyper_error(e)])

    def _process_client_command(self, code, silent):
        "Execute a client command"

        commands = {
            "i": self._command_input_sql,
            "o": self._command_redirect_output,
            "attach": self._command_attach,
            "detach": self._command_detach,
        }

        # Tokenize command line
        code = code.lstrip()
        assert code[0] == '\\'
        code = code[1:]
        args = list(shlex.split(code, posix=True))
        cmd = args.pop(0)

        if cmd == "?" or cmd == "help":
            help_text = 'SQL command reference: https://help.tableau.com/current/api/hyper_api/en-us/reference/sql/sql-commands.html\n'
            help_text += 'Additional client-side commands:\n'
            help_text += tabulate((["\\" + c[0], c[1].__doc__] for c in commands.items()), tablefmt='plain')
            help_text += '\n'
            help_text += 'Parameters are parsed in POSIX shell manner.\n'
            self._send_text(help_text)
            return self._success_response()

        if cmd not in commands:
            return self._error_response("UnknownClientCommand", cmd, [f"Unknown client command \{cmd}"])

        response = commands[cmd](args)

        return response if response is not None else self._success_response()

    def do_execute(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False):
        if code.lstrip()[0] == '\\':
            return self._process_client_command(code, silent)
        else:
            return self.execute_sql(code, silent)