Esempio n. 1
0
class Extract():
    def __init__(self, path="superstore.hyper"):
        super().__init__()
        self._path = Path(__file__).parent / path
        self._table_name = TableName("Extract", "Extract")
        self._hyper =  HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU)
        self._connection = Connection(  endpoint=self._hyper.endpoint, 
                                        database=self._path)

    def __del__(self):
        self._connection.close()
        self._hyper.close()

    def delete_data(self, date):
        row_count = self._connection.execute_command(
            command= f"DELETE FROM {self._table_name} WHERE order_date>='{date}'"
        )
        print(f"The number of deleted rows in table {self._table_name} "
            f"is {row_count}.\n")

    def read_extract(self):
        print(f"These are all rows in the table {self._table_name}:")
        rows_in_table = self._connection.execute_list_query(query=f"SELECT * FROM {self._table_name}")
        print(rows_in_table)

    def insert_data(self, data):
        with Inserter(self._connection, self._table_name) as inserter:
                inserter.add_rows(rows=data)
                inserter.execute()
Esempio n. 2
0
 def test_hyper_columns_to_dss_columns(self):
     schema_converter = SchemaConversion()
     path_to_hyper = "data/superstore_sample.hyper"
     hyper = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU)
     connection = Connection(hyper.endpoint, path_to_hyper)
     hyper_table = connection.catalog.get_table_definition(
         TableName('public', 'Customer'))
     connection.close()
     hyper.close()
     dss_columns = schema_converter.hyper_columns_to_dss_columns(
         hyper_table.columns)
     return True
Esempio n. 3
0
 def test_to_dss_date(self):
     schema_converter = SchemaConversion()
     path_to_hyper = "data/superstore_sample.hyper"
     hyper = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU)
     connection = Connection(hyper.endpoint, path_to_hyper)
     hyper_table = TableName('public', 'Orders')
     hyper_table_def = connection.catalog.get_table_definition(hyper_table)
     result = connection.execute_query(f'SELECT * FROM {hyper_table}')
     for row in result:
         pass
     sample_date = row[2].to_date()
     dss_date = datetime.datetime(sample_date.year, sample_date.month,
                                  sample_date.day)
     connection.close()
     hyper.close()
     dss_columns = schema_converter.hyper_columns_to_dss_columns(
         hyper_table_def.columns)
     return True
class TableauTableReader(object):
    def __init__(self, schema_name, table_name):
        """
        Wrapper for the Tableau Hyper formatter

        :param schema_name : name of the schema as stored in the Tableau Hyper file
        :param table_name : name of the table as stored in the Tableau Hyper file
        """

        self.table_name = table_name
        self.schema_name = schema_name

        self.hyper_table = None
        self.hyper_columns = None
        self.hyper_storage_types = None
        self.dss_columns = None
        self.dss_storage_types = None

        self.rows = []
        self.row_index = 0

        self.path_to_hyper = None

        self.hyper = None
        self.connection = None

        self.schema_converter = SchemaConversion()

        # Handle batch querying
        self.offset = 0
        self.limit = 10000
        self.end_read = False

    def create_tmp_hyper_file(self):
        """
        Create a temporary file to store the streaming buffer
        :return: self.path_to_hyper: path to the temporary file
        """
        cache_dir = get_cache_location_from_user_config()
        # Set the delete parameter to False imperatively to avoid early deletion
        self.path_to_hyper = tempfile.NamedTemporaryFile(
            suffix=".hyper",
            prefix="tmp_hyper_file_",
            delete=False,
            dir=cache_dir).name
        logger.info(
            "Creating temporary file to store future buffer stream from Hyper: {} "
            .format(self.path_to_hyper))

    def read_buffer(self, stream):
        """
        Read and store the full stream
        :param stream: stream coming from the Tableau Hyper file
        :return:
        """
        line = True
        with open(self.path_to_hyper, "ab") as f:
            while line:
                line = stream.read(1024)
                f.write(line)
        logger.info("Stored the full stream as bytes")

    def open_connection(self):
        """
        Open the connection to the Tableau Hyper file and the database
        """
        self.hyper = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU)
        self.connection = Connection(self.hyper.endpoint, self.path_to_hyper)
        logger.info("Opened the connection to Tableau Hyper file")

    def read_hyper_columns(self):
        """
        Read from the Tableau Hyper file the columns and schema of the table

        :return: self.hyper_storage_types
        """
        logger.info("Trying to read Tableau Hyper table {}.{} ...".format(
            self.schema_name, self.table_name))
        hyper_table = TableName(self.schema_name, self.table_name)
        self.hyper_table = hyper_table

        try:
            table_def = self.connection.catalog.get_table_definition(
                hyper_table)
        except HyperException as e:
            logger.warning(
                "The target table does not exists in this hyper file. Requested table: {}.{}"
                .format(self.table_name, self.schema_name))
            raise Exception("Table does not exist: {}.{}".format(
                self.schema_name, self.table_name))

        self.hyper_columns = table_def.columns
        self.hyper_storage_types = [
            column.type.tag for column in self.hyper_columns
        ]

        self.dss_columns = self.schema_converter.hyper_columns_to_dss_columns(
            self.hyper_columns)
        self.dss_storage_types = [
            column['type'] for column in self.dss_columns
        ]

        self.schema_converter.set_dss_storage_types(self.dss_storage_types)
        self.schema_converter.set_hyper_storage_types(self.hyper_storage_types)

    def fetch_rows(self, offset, limit):
        """
        Retrieve all the rows from the Tableau Hyper file, convert values on the fly
        """
        sql_hyper_query = f'SELECT {build_query(self.hyper_columns)} FROM {self.hyper_table} OFFSET {offset} LIMIT {limit}'
        logger.warning("SQL query: {} ".format(sql_hyper_query))
        try:
            result = self.connection.execute_query(sql_hyper_query)
        except Exception as err:
            logger.fatal("Tried to execute query but was unsuccessful.")
            raise err
        for row in result:
            self.rows.append(row)

    def close_connection(self):
        """
        Close the connection to the Tableau Hyper file
        """
        self.connection.close()
        self.hyper.close()
        if os.path.exists(self.path_to_hyper):
            os.remove(self.path_to_hyper)

    def read_schema(self):
        """
        Access schema
        """
        logger.info("Send to dss during read_schema: {}".format(
            self.dss_columns))
        return self.dss_columns

    def read_row(self):
        """
        Read one row from the stored data
        """
        if self.end_read:
            return None
        if len(self.rows) == 0:
            self.fetch_rows(self.offset, self.limit)
            self.offset += self.limit
        if len(self.rows) == 0:
            self.close_connection()
            self.end_read = True
            logger.info("Finished reading rows from hyper file...")
            return None
        else:
            hyper_row = self.rows.pop()
            dss_row = self.schema_converter.prepare_row_to_dss(hyper_row)
            row = {}
            for column, value in zip(self.dss_columns, dss_row):
                row[column["name"]] = value
            self.row_index += 1
            return row
Esempio n. 5
0
class HyperKernel(Kernel):
    implementation = 'Hyper'
    implementation_version = '0.0'
    language = 'sql'
    language_version = '0.0'
    language_info = {
        'name': 'sql',
        'mimetype': 'text/sql',
        'file_extension': '.sql',
    }
    banner = "Hyper 🚀 - Your friendly neighborhood SQL database.\n" +\
             "Type '\\?' for help."

    def __init__(self, *args, **kwargs):
        super(HyperKernel, self).__init__(*args, **kwargs)

        self._hyper_process = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU, 'jupyter_sql_kernel')
        self._connection = Connection(self._hyper_process.endpoint)
        self._output_func = self._display_output

    def do_shutdown(self, restart):
        self._connection.close()
        self._hyper_process.close()
        return {'status': 'ok', 'restart': restart}

    def _success_response(self, payloads=[]):
        return {
                'status': 'ok',
                # The base class increments the execution count for us already
                'execution_count': self.execution_count,
                'payload': payloads,
                'user_expressions': {},
               }

    def _error_response(self, ename, evalue, traceback):
        # Format & send the error message
        error_response = {
           'ename': ename,
           'evalue': evalue,
           'traceback': traceback
        }
        self.send_response(self.iopub_socket, 'error', error_response)
        error_response['status'] = 'error'
        error_response['execution_count'] = self.execution_count
        return error_response

    def _send_text(self, txt):
        self.send_response(self.iopub_socket, 'display_data', {'data': {'text/plain': txt}, 'metadata': {}})

    def _format_hyper_error(self, e):
        formatted = f"Error:\n{e.main_message}"
        if e.hint:
            formatted += f"HINT: {e.hint}"
        return formatted

    def _display_output(self, sql_result, silent):
        if not silent:
            column_names = [c.name for c in sql_result.schema.columns]
            result = list(sql_result)
            if column_names or result:
                response_data = {
                    'text/plain': tabulate(result, headers=column_names),
                    'text/html': tabulate(result, headers=column_names, tablefmt='html'),
                }
                # Integration with the "@tableau/query-graphs-jupyterlab-extension" extension for plan rendering in JupyterLab
                if column_names == ["plan"]:
                    try:
                        response_data['application/vnd.tableau.hyper-queryplan'] = json.loads("".join(row[0] for row in result))
                    except json.JSONDecodeError as e:
                        pass
                # Support for "Vega output" form Hyper.
                # In case the user is skilled enough to write a SQL query which outputs a Vega visualizations, go ahead and display the visualization in JupyterLab.
                if len(column_names) == 1 and len(result) == 1 and isinstance(result[0][0], str):
                    try:
                        parsed = json.loads(result[0][0])
                        if isinstance(parsed, dict):
                            if parsed.get("$schema", "").startswith('https://vega.github.io/schema/vega/'):
                                response_data['application/vnd.vega.v5+json'] = parsed
                                del response_data['text/html']
                            if parsed.get("$schema", "").startswith('https://vega.github.io/schema/vega-lite/'):
                                response_data['application/vnd.vegalite.v3+json'] = parsed
                                del response_data['text/html']
                    except json.JSONDecodeError as e:
                        pass
                self.send_response(self.iopub_socket, 'display_data', {'source': 'sql', 'data': response_data, 'metadata': {}})

    def _create_file_output_func(self, filename):
        def _file_output(self, sql_result, silent):
            with open(filename, "a") as f:
                column_names = [c.name for c in sql_result.schema.columns]
                result = list(sql_result)
                f.write(tabulate(result, headers=column_names))
                f.write("\n")
        return _file_output.__get__(self, HyperKernel)

    def _discard_output(self, sql_result, silent):
        if sql_result is not None and sql_result.schema is not None:
            # We still want to fetch the whole result (to not screw up timing measurements)
            for i in sql_result:
                pass

    def execute_sql(self, code, silent):
        "Execute a SQL query and display the results to the user"
        start_time = time.perf_counter()
        try:
            with self._connection.execute_query(code) as sql_result:
                self._output_func(sql_result, silent)
        except HyperException as e:
            # Format & send the error message
            return self._error_response(str("HyperException"), str(e.args[0]), [self._format_hyper_error(e)])

        end_time = time.perf_counter()
        elapsed = end_time - start_time
        self._send_text('{:.3f}s elapsed'.format(elapsed))

        return self._success_response()

    def _command_input_sql(self, args):
        """
        Read SQL query from a file and execute it
        """
        if len(args) != 1:
            return self._error_response("InvalidClientCommandArguments", repr(args), ["Unexpected number of arguments"])
        filename = args[0]
        try:
            with open(filename) as f:
                file_content = f.read()
        except:
            return self._error_response("IOError", repr(args), [f"Unable to read file '{filename}'"])
        self.execute_sql(file_content, silent=False)

    def _command_redirect_output(self, args):
        """
        Redirect output into a file
        """
        if len(args) > 1:
            return self._error_response("InvalidClientCommandArguments", repr(args), ["Unexpected number of arguments"])
        if len(args) == 0:
            self._output_func = self._display_output
        elif args[0] == "-":
            self._output_func = self._discard_output
        else:
            filename = args[0]
            # Truncate the file & create if it does not exist
            try:
                with open(filename, "w"):
                    pass
            except:
                return self._error_response("IOError", repr(args), [f"Unable to read file '{filename}'"])
            self._output_func = self._create_file_output_func(filename)

    def _command_attach(self, args):
        """
        Open a Hyper file
        """
        if len(args) != 2:
            return self._error_response("InvalidClientCommandArguments", repr(args), ["Unexpected number of arguments"])
        database_path = args[0]
        alias = args[1]
        try:
            self._connection.catalog.attach_database(database_path, alias)
        except HyperException as e:
            # Format & send the error message
            return self._error_response(str("HyperException"), str(e.args[0]), [self._format_hyper_error(e)])

    def _command_detach(self, args):
        """
        Close a Hyper file
        """
        if len(args) != 1:
            return self._error_response("InvalidClientCommandArguments", repr(args), ["Unexpected number of arguments"])
        alias = args[0]
        try:
            self._connection.catalog.detach_database(alias)
        except HyperException as e:
            # Format & send the error message
            return self._error_response(str("HyperException"), str(e.args[0]), [self._format_hyper_error(e)])

    def _process_client_command(self, code, silent):
        "Execute a client command"

        commands = {
            "i": self._command_input_sql,
            "o": self._command_redirect_output,
            "attach": self._command_attach,
            "detach": self._command_detach,
        }

        # Tokenize command line
        code = code.lstrip()
        assert code[0] == '\\'
        code = code[1:]
        args = list(shlex.split(code, posix=True))
        cmd = args.pop(0)

        if cmd == "?" or cmd == "help":
            help_text = 'SQL command reference: https://help.tableau.com/current/api/hyper_api/en-us/reference/sql/sql-commands.html\n'
            help_text += 'Additional client-side commands:\n'
            help_text += tabulate((["\\" + c[0], c[1].__doc__] for c in commands.items()), tablefmt='plain')
            help_text += '\n'
            help_text += 'Parameters are parsed in POSIX shell manner.\n'
            self._send_text(help_text)
            return self._success_response()

        if cmd not in commands:
            return self._error_response("UnknownClientCommand", cmd, [f"Unknown client command \{cmd}"])

        response = commands[cmd](args)

        return response if response is not None else self._success_response()

    def do_execute(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False):
        if code.lstrip()[0] == '\\':
            return self._process_client_command(code, silent)
        else:
            return self.execute_sql(code, silent)
Esempio n. 6
0
class TableauTableWriter(object):
    """
    Wrapper class for writing a Tableau Hyper file from a DSS dataset.
    """
    def __init__(self, schema_name, table_name):
        """
        :param schema_name: name of the target schema
        :param table_name: name of the target table
        """
        self.row_index = 0
        self.data = []
        self.batch_size = 2000

        self.schema_name = schema_name
        self.table_name = table_name

        self.output_file = None
        self.is_geo_table = False

        self.schema_converter = SchemaConversion()

        # Tableau Hyper related objects
        self.hyper = None
        self.connection = None
        self.tmp_table_definition = None
        self.output_table_definition = None
        self.tmp_table_inserter = None
        self.output_table_inserter = None

    def create_schema(self, schema_dss, destination_file_path):
        """
        Read the Tableau Hyper file an.

        :param schema_dss: DSS schema from the DSS dataset to export
            example: [{"columns": [{"name": "customer_id", "type": "bigint"}, ...]}, ...]

        :param destination_file_path:
        :return:
        """
        # Read the destination file of the dss
        self.output_file = destination_file_path
        logger.info(
            "Writing the Tableau Hyper file to the following location: {}".
            format(destination_file_path))
        logger.info(
            "The dataset to export has the following schema: {}".format(
                schema_dss))

        dss_columns = schema_dss['columns']
        dss_storage_types = [
            column_descriptor['type'] for column_descriptor in dss_columns
        ]
        self.schema_converter.set_dss_storage_types(dss_storage_types)

        self.is_geo_table = dss_is_geo(schema_dss)
        logger.info("The input dataset contains a geo column: {}".format(
            self.is_geo_table))

        if not self.schema_name or not self.table_name:
            logger.warning("Did not received the table or schema name.")
            raise ValueError("No valid schema or table name received.")

        logger.info("Received target schema {} and table {}".format(
            self.schema_name, self.table_name))

        # Create the Tableau Hyper schema from the DSS schema
        self.output_table_definition = TableDefinition(
            TableName(self.schema_name, self.table_name),
            self.schema_converter.dss_columns_to_hyper_columns(dss_columns))

        # Open connection to file
        self.hyper = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU)
        self.connection = Connection(self.hyper.endpoint, self.output_file,
                                     CreateMode.CREATE_AND_REPLACE)
        assert self.connection is not None
        self.connection.catalog.create_schema(self.schema_name)
        self.connection.catalog.create_table(self.output_table_definition)

        # Handle the geo case
        if self.is_geo_table:
            logger.info("Detected geo column. Creating a temporary table...")
            dss_tmp_schema = geo_to_text(schema_dss)
            dss_tmp_columns = dss_tmp_schema['columns']
            self.tmp_table_definition = TableDefinition(
                TableName(self.schema_name, "tmp_" + self.table_name),
                self.schema_converter.dss_columns_to_hyper_columns(
                    dss_tmp_columns))
            self.connection.catalog.create_table(self.tmp_table_definition)
            logger.info("Created temporary table")

    def write_row(self, row):
        """
        Handle one row of data to export
        :param row: a tuple with N strings matching the schema passed to open method
        """
        try:
            hyper_compliant_row = self.schema_converter.prepare_row_to_hyper(
                row)
            self.data.append(hyper_compliant_row)
            self.row_index += 1

            if self.row_index % self.batch_size == 0:
                logger.info("Writing {} lines to hyper file".format(
                    len(self.data)))
                self.update_table()  # send data to hyper file, flush buffer
                self.data = []
        except Exception as err:
            logger.warning(
                "Failed to perform writing on following row:\n{}".format(row))
            raise err
        return True

    def update_table(self):
        """
        Perform an update of the Tableau Hyper file with new data
        """
        # if there is a geo table, create an intermediate and temporary table
        if self.is_geo_table:

            self.tmp_table_inserter = Inserter(self.connection,
                                               self.tmp_table_definition)
            self.tmp_table_inserter.add_rows(self.data)
            self.tmp_table_inserter.execute()
            self.tmp_table_inserter.close()

            self.connection.execute_command(
                command=
                f"INSERT INTO {self.output_table_definition.table_name} SELECT * FROM {self.tmp_table_definition.table_name};"
            )
            self.connection.execute_command(
                command=
                f"TRUNCATE TABLE {self.tmp_table_definition.table_name};")

        else:

            if self.connection is None:
                logger.warning("Connection to Tableau Hyper file is undefined")
            self.output_table_inserter = Inserter(self.connection,
                                                  self.output_table_definition)
            self.output_table_inserter.add_rows(self.data)
            self.output_table_inserter.execute()
            self.output_table_inserter.close()

        return True

    def close(self):
        """
        Release the Tableau Hyper connections
        """
        logger.info("Closing export ...")
        if self.data:
            logger.info("Performing final data update...")
            self.update_table()
            self.data = []
        logger.info("Closing Tableau Hyper connections...")
        if self.is_geo_table:
            self.connection.execute_command(
                command=f"DROP TABLE {self.tmp_table_definition.table_name};")
        self.hyper.close()
        self.connection.close()
        logger.info("Closed export")
        return True