예제 #1
0
class Extract():
    def __init__(self, path="superstore.hyper"):
        super().__init__()
        self._path = Path(__file__).parent / path
        self._table_name = TableName("Extract", "Extract")
        self._hyper =  HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU)
        self._connection = Connection(  endpoint=self._hyper.endpoint, 
                                        database=self._path)

    def __del__(self):
        self._connection.close()
        self._hyper.close()

    def delete_data(self, date):
        row_count = self._connection.execute_command(
            command= f"DELETE FROM {self._table_name} WHERE order_date>='{date}'"
        )
        print(f"The number of deleted rows in table {self._table_name} "
            f"is {row_count}.\n")

    def read_extract(self):
        print(f"These are all rows in the table {self._table_name}:")
        rows_in_table = self._connection.execute_list_query(query=f"SELECT * FROM {self._table_name}")
        print(rows_in_table)

    def insert_data(self, data):
        with Inserter(self._connection, self._table_name) as inserter:
                inserter.add_rows(rows=data)
                inserter.execute()
예제 #2
0
 def __init__(self, path="superstore.hyper"):
     super().__init__()
     self._path = Path(__file__).parent / path
     self._table_name = TableName("Extract", "Extract")
     self._hyper =  HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU)
     self._connection = Connection(  endpoint=self._hyper.endpoint, 
                                     database=self._path)
 def open_connection(self):
     """
     Open the connection to the Tableau Hyper file and the database
     """
     self.hyper = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU)
     self.connection = Connection(self.hyper.endpoint, self.path_to_hyper)
     logger.info("Opened the connection to Tableau Hyper file")
예제 #4
0
    def create_schema(self, schema_dss, destination_file_path):
        """
        Read the Tableau Hyper file an.

        :param schema_dss: DSS schema from the DSS dataset to export
            example: [{"columns": [{"name": "customer_id", "type": "bigint"}, ...]}, ...]

        :param destination_file_path:
        :return:
        """
        # Read the destination file of the dss
        self.output_file = destination_file_path
        logger.info(
            "Writing the Tableau Hyper file to the following location: {}".
            format(destination_file_path))
        logger.info(
            "The dataset to export has the following schema: {}".format(
                schema_dss))

        dss_columns = schema_dss['columns']
        dss_storage_types = [
            column_descriptor['type'] for column_descriptor in dss_columns
        ]
        self.schema_converter.set_dss_storage_types(dss_storage_types)

        self.is_geo_table = dss_is_geo(schema_dss)
        logger.info("The input dataset contains a geo column: {}".format(
            self.is_geo_table))

        if not self.schema_name or not self.table_name:
            logger.warning("Did not received the table or schema name.")
            raise ValueError("No valid schema or table name received.")

        logger.info("Received target schema {} and table {}".format(
            self.schema_name, self.table_name))

        # Create the Tableau Hyper schema from the DSS schema
        self.output_table_definition = TableDefinition(
            TableName(self.schema_name, self.table_name),
            self.schema_converter.dss_columns_to_hyper_columns(dss_columns))

        # Open connection to file
        self.hyper = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU)
        self.connection = Connection(self.hyper.endpoint, self.output_file,
                                     CreateMode.CREATE_AND_REPLACE)
        assert self.connection is not None
        self.connection.catalog.create_schema(self.schema_name)
        self.connection.catalog.create_table(self.output_table_definition)

        # Handle the geo case
        if self.is_geo_table:
            logger.info("Detected geo column. Creating a temporary table...")
            dss_tmp_schema = geo_to_text(schema_dss)
            dss_tmp_columns = dss_tmp_schema['columns']
            self.tmp_table_definition = TableDefinition(
                TableName(self.schema_name, "tmp_" + self.table_name),
                self.schema_converter.dss_columns_to_hyper_columns(
                    dss_tmp_columns))
            self.connection.catalog.create_table(self.tmp_table_definition)
            logger.info("Created temporary table")
예제 #5
0
 def test_hyper_columns_to_dss_columns(self):
     schema_converter = SchemaConversion()
     path_to_hyper = "data/superstore_sample.hyper"
     hyper = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU)
     connection = Connection(hyper.endpoint, path_to_hyper)
     hyper_table = connection.catalog.get_table_definition(
         TableName('public', 'Customer'))
     connection.close()
     hyper.close()
     dss_columns = schema_converter.hyper_columns_to_dss_columns(
         hyper_table.columns)
     return True
예제 #6
0
def insert_data():
    """
    Creates a simple .hyper file. For more on this, see the below example:
    https://github.com/tableau/hyper-api-samples/blob/main/Tableau-Supported/Python/insert_data_into_single_table.py
    """
    print("Creating single table for conversion.")

    # Starts the Hyper Process with telemetry enabled to send data to Tableau.
    # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:

        # Creates new Hyper file
        # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists.
        with Connection(
                endpoint=hyper.endpoint,
                database=path_to_database,
                create_mode=CreateMode.CREATE_AND_REPLACE) as connection:

            # Creates schema and table with table_definition.
            connection.catalog.create_schema(
                schema=extract_table.table_name.schema_name)
            connection.catalog.create_table(table_definition=extract_table)

            # The rows to insert into the "Extract"."Extract" table.
            data_to_insert = [["DK-13375", "Dennis Kane", 685, "Consumer"],
                              ["EB-13705", "Ed Braxton", 815, "Corporate"]]

            # Insert the data.
            with Inserter(connection, extract_table) as inserter:
                inserter.add_rows(rows=data_to_insert)
                inserter.execute()

        print("The connection to the Hyper file has been closed.")
    print("The Hyper process has been shut down.")
def run_create_hyper_file_from_csv():

    path_to_database = Path("Workbook_Info.hyper")

    process_parameters = {
        "log_file_max_count": "2",
        "log_file_size_limit": "100M"
    }

    with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU, parameters=process_parameters) as hyper:

        connection_parameters = {"lc_time": "en_US"}

        with Connection(endpoint=hyper.endpoint,
                        database=path_to_database,
                        create_mode=CreateMode.CREATE_AND_REPLACE,
                        parameters=connection_parameters) as connection:

            connection.catalog.create_table(table_definition=Workbook_Info_table)

            path_to_csv = str(Path(__file__).parent / "workbook_info.csv")

            count_in_Workbook_Info_table = connection.execute_command(
                command=f"COPY {Workbook_Info_table.table_name} from {escape_string_literal(path_to_csv)} with "
                f"(format csv, NULL 'NULL', delimiter ',', header)")
예제 #8
0
def to_hyper(df,
             hyper_file_name,
             custom_schema="Extract",
             custom_table_name="Extract"):
    """
    Write a Tableau Hyper file from a Pandas DataFrame.

    Currently can only write single table extracts, which is Tableau's
    default way of creating an extract.

    Args:
        df: Specify which DataFrame you want to output
        hyper_file_name: Specify the file name such as "Example.hyper"
        custom_schema: If you need to change the schema name. Defaults to "Extract"
        custom_table_name: If you need to change the schema name. Defaults to "Extract"

    Returns:
        Tableau Hyper file
    """

    # Starts the Hyper Process
    with HyperProcess(
            telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU,
            parameters={"log_config": ""},
    ) as hyper:

        # Creates a .hyper file to put the data into
        with Connection(hyper.endpoint, hyper_file_name,
                        CreateMode.CREATE_AND_REPLACE) as connection:

            connection.catalog.create_schema(custom_schema)

            # create a .hyper compatible column definition
            # from pd DataFrame column names and dtypes
            # using 3 list comprehensions to loop through
            # all the columns in the DataFrame

            column_names = [column for column in df.columns]

            column_dtype = [dtype for dtype in df.dtypes]

            hyper_table = TableDefinition(
                TableName(custom_schema, custom_table_name),
                [
                    TableDefinition.Column(
                        column_names[column], dtype_mapper[str(
                            column_dtype[column])])
                    for column in range(len(column_names))
                ],
            )
            connection.catalog.create_table(hyper_table)

            # Repace NaN with None, otherwise it will not be Null in Tableau
            df.replace({np.nan: None}, inplace=True)

            # Insert the data values into the hyper file
            data_to_insert = df.to_numpy()
            with Inserter(connection, hyper_table) as inserter:
                inserter.add_rows(tqdm((row for row in data_to_insert)))
                inserter.execute()
예제 #9
0
def run_create_hyper_file_from_csv():
    """
    Loading data from a csv into a new Hyper file
    """
    print("Load data from CSV into table in new Hyper file")
    # Starts the Hyper Process with telemetry enabled to send data to Tableau.
    # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
    with HyperProcess(
            telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:

        with Connection(
                endpoint=hyper.endpoint,
                database=cf.hyper_file_path,
                create_mode=CreateMode.CREATE_AND_REPLACE) as connection:

            connection.catalog.create_table(table_definition=customer_table)
            # `execute_command` executes a SQL statement and returns the impacted row count.
            count_in_customer_table = connection.execute_command(
                command=
                f"COPY {customer_table.table_name} from {escape_string_literal(cf.input_file_path)} with "
                f"(format csv, NULL 'NULL', delimiter ',', header)")

            print(
                f"The number of rows in table {customer_table.table_name} is {count_in_customer_table}."
            )

        print("The connection to the Hyper file has been closed.")
    print("The Hyper process has been shut down.")
예제 #10
0
def get_table_columns(table):
    """
    Returns the list of the columns in the table.
    This is a list of dictionaries having the following keys:
    * name: the Tableau caption of the column (when found)
    * type: the data type contained in that column
    * nullability: a flag indicating if the column may contain null values
    """
    return_value = []

    columns_mappings = get_columns_mappings()

    with HyperProcess(
            telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with Connection(endpoint=hyper.endpoint,
                        database=settings.HYPER_FILE_PATH) as connection:
            table_definition = connection.catalog.get_table_definition(
                name=table)
            for column in table_definition.columns:
                name = str(column.name).replace('"', "")
                if name in columns_mappings.keys():
                    name = columns_mappings[name]
                return_value.append({
                    "name": name,
                    "type": str(column.type),
                    "nullability": str(column.nullability),
                })
    return return_value
예제 #11
0
    def append(self, sql: str = None, df: DataFrame = None):
        """Appends new data to a Hyper File"""
        # First, must materialize the new data back to the driver node
        if sql is not None and df is None:
            self.sql = sql
            self.df = get_spark_dataframe(sql)
        elif sql is None and df is not None:
            self.df = df
        else:
            raise ValueError(
                'Missing either SQL statement or Spark DataFrame as argument.')
        data = HyperUtils.get_rows(self.df)

        # Convert the Spark DataFrame schema to a Tableau Table Def
        table_def = HyperUtils.get_table_def(self.df, "Extract", "Extract")

        # Insert, the new data into Hyper File
        hyper_database_path = self.path
        logging.info(
            f'Inserting new data into Hyper database: {hyper_database_path}')
        with HyperProcess(
                telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hp:
            with Connection(endpoint=hp.endpoint,
                            database=hyper_database_path,
                            create_mode=CreateMode.NONE) as connection:
                with Inserter(connection, table_def) as inserter:
                    inserter.add_rows(rows=data)
                    inserter.execute()
예제 #12
0
def _read_table(*, connection: tab_api.Connection,
                table: TableType) -> pd.DataFrame:
    if isinstance(table, str):
        table = tab_api.TableName(table)

    table_def = connection.catalog.get_table_definition(table)
    columns = table_def.columns

    dtypes: Dict[str, str] = {}
    for column in columns:
        column_type = pantab_types._ColumnType(column.type, column.nullability)
        dtypes[column.name.unescaped] = _tableau_to_pandas_type(column_type)

    with connection.execute_query(f"SELECT * from {table}") as result:
        df = pd.DataFrame(result)

    df.columns = dtypes.keys()
    # The tableauhyperapi.Timestamp class is not implicitly convertible to a datetime
    # so we need to run an apply against applicable types
    for key, val in dtypes.items():
        if val == "datetime64[ns]":
            df[key] = df[key].apply(lambda x: x._to_datetime())
        elif val == "datetime64[ns, UTC]":
            df[key] = df[key].apply(lambda x: x._to_datetime()).dt.tz_localize(
                "UTC")
        elif val == "timedelta64[ns]":
            df[key] = df[key].apply(_interval_to_timedelta)

    df = df.astype(dtypes)
    df = df.fillna(value=np.nan)  # Replace any appearances of None

    return df
예제 #13
0
def run_create_hyper_file_from_csv():

    print("Inside Fucntion to pick data from CSV into table in new Hyper file")
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:

        # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists
        with Connection(
                endpoint=hyper.endpoint,
                database='C:/Users/admin/Desktop/extrp1.hyper',
                create_mode=CreateMode.CREATE_AND_REPLACE) as connection:
            connection.catalog.create_schema('extract')
            connection.catalog.create_table(table_definition=extract_table)
            # create a path that locates CSV file to be used
            path_to_csv = 'C:/Users/admin/Desktop/testing1.csv'

            # `execute_command` executes a SQL statement and returns the impacted row count.
            count_in_table = connection.execute_command(
                command=
                f"COPY {extract_table.table_name} from {escape_string_literal(path_to_csv)} with "
                f"(format csv, NULL 'NULL', delimiter ',', header)")
            print(count_in_table)
            with Inserter(connection, TableName('extract',
                                                'extract')) as inserter:
                inserter.add_rows(rows=data_to_insert)
                inserter.execute()
 def fn_hyper_handle(self, in_logger, timer, in_dict):
     timer.start()
     out_data_frame = None
     try:
         # Starts Hyper Process with telemetry enabled/disabled to send data to Tableau or not
         # To opt in, simply set telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU.
         # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
         telemetry_chosen = Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU
         with HyperProcess(telemetry=telemetry_chosen) as hyper_process:
             in_logger.debug(
                 self.locale.gettext('Hyper engine process initialized'))
             in_logger.debug(
                 self.locale.gettext(
                     'Chosen Telemetry is {telemetry_value}').replace(
                         '{telemetry_value}', str(telemetry_chosen)))
             timer.stop()
             timer.start()
             hyper_create_mode = {
                 'append': CreateMode.NONE,
                 'overwrite': CreateMode.CREATE_AND_REPLACE,
                 'read': CreateMode.NONE,
                 'delete': CreateMode.NONE,
                 'update': CreateMode.NONE,
             }
             #  Connect to an existing .hyper file
             with Connection(endpoint=hyper_process.endpoint,
                             database=in_dict['hyper file'],
                             create_mode=hyper_create_mode.get(
                                 in_dict['action'])) as hyper_connection:
                 in_logger.debug(
                     self.locale.gettext(
                         'Connection to the Hyper engine using file name "{file_name}" '
                         + 'has been established').replace(
                             '{file_name}', in_dict['hyper file']))
                 timer.stop()
                 in_dict['connection'] = hyper_connection
                 if in_dict['action'] == 'read':
                     out_data_frame = self.fn_hyper_read(
                         in_logger, timer, in_dict)
                 elif in_dict['action'] in ('append', 'overwrite'):
                     self.fn_write_data_into_hyper_file(
                         in_logger, timer, in_dict)
                 elif in_dict['action'] in ('delete', 'update'):
                     self.fn_delete_data_from_hyper(in_logger, timer,
                                                    in_dict)
                     self.fn_get_records_count_from_table(
                         in_logger, timer, in_dict)
         timer.start()
         hyper_connection.close()
         in_logger.info(
             self.locale.gettext(
                 'Connection to the Hyper engine file has been closed'))
         in_logger.info(
             self.locale.gettext('Hyper engine process has been shut down'))
         timer.stop()
     except HyperException as ex:
         in_logger.error(str(ex).replace(chr(10), ' '))
         timer.stop()
         exit(1)
     return out_data_frame
def HyperCreate():
    print(">>> Creating Hyper File <<<")
    path_to_database = Path(hyperfile)
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with Connection(endpoint=hyper.endpoint,database=path_to_database,create_mode=CreateMode.CREATE_IF_NOT_EXISTS) as connection:
            connection.execute_command(command=
                f'''create table if not exists http  (
                    serving_host             text,
                    client_host              text,
                    username                 text,
                    ts                       text,
                    timezone                 text,
                    port                     text,
                    request_body             text,
                    xforward_for             text,
                    status_code              text,
                    response_size            text,
                    content_length           text,
                    request_time_ms          text,
                    request_id               text
                );''')
            connection.execute_command(command=
                f'''create table if not exists dump_table (
                    dump text
                );''')
예제 #16
0
 def _create_schema(self):
     with HyperProcess(
             telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
         with Connection(hyper.endpoint, self.hyper,
                         CreateMode.CREATE_AND_REPLACE) as connection:
             for table in self.tables:
                 connection.catalog.create_table(table)
예제 #17
0
 def get_tables(schema: str, hyper_file_path: str):
     with HyperProcess(
             telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hp:
         with Connection(endpoint=hp.endpoint,
                         database=hyper_file_path) as connection:
             catalog = connection.catalog
             # Query the Catalog API for all tables under the given schema
             return catalog.get_table_names(SchemaName(schema))
예제 #18
0
 def get_row_count(schema: str, table: str, hyper_file_path: str):
     with HyperProcess(
             telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hp:
         with Connection(endpoint=hp.endpoint,
                         database=hyper_file_path) as connection:
             # Query the Hyper File for the number of rows in the table
             return connection.execute_scalar_query(
                 f"SELECT COUNT(*) FROM {TableName(schema, table)}")
예제 #19
0
class Hyper(System):
    def __init__(self, filename):
        self.db = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU)
        self.conn = Connection(self.db.endpoint, filename, CreateMode.CREATE)

    def create(self, ddl):
        count = self.conn.execute_command(ddl)
        return [count]

    def load(self, filename):
        count = self.conn.execute_command("COPY logs FROM '" + filename +
                                          "' WITH (FORMAT csv, HEADER)")
        return [count]

    def query(self, sql):
        #schema = result.schema()
        return self.conn.execute_query(sql)
예제 #20
0
    def test_export_date_values(self):
        """
        Test the export of geo values (DSS geopoint storage type)
        :return:
        """
        nan = float("nan")

        # ===> Define parameters input from DSS for exporter
        config = {}
        plugin_config = {}
        schema = {
            'columns': [{
                'name': 'price',
                'type': 'double'
            }, {
                'name': 'last_review_parsed',
                'type': 'date'
            }],
            'userModified':
            True
        }
        rows = [(149.0, Timestamp('2018-10-19 13:20:50.349000')),
                (225.0, Timestamp('2019-05-21 00:00:00')), (150.0, NaT),
                (80.0, NaT)]
        # <===

        # ===> Create a DSS-like exporter
        exporter = TableauHyperExporter(config, plugin_config)
        output_file_name = get_random_alphanumeric_string(10) + '.hyper'
        destination_file_path = os.path.join(self.output_path,
                                             output_file_name)
        exporter.open_to_file(schema, destination_file_path)
        for row in rows:
            exporter.write_row(row)
        exporter.close()

        with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU
                          ) as hyper:
            with Connection(endpoint=hyper.endpoint,
                            database=destination_file_path) as connection:
                with connection.execute_query(
                        query=f"SELECT * FROM {TableName('Extract', 'Extract')}"
                ) as result:
                    rows_from_hyper = list(result)

        assert len(rows) == len(rows_from_hyper)

        count = 0
        valid = 0
        for i in range(len(rows)):
            row_dss, row_hyper = rows[i], rows_from_hyper[i]
            for j in range(len(row_dss)):
                a, b = row_dss[j], row_hyper[j]
                if (pd.isna(a) and pd.isna(b)) or (a == b):
                    valid += 1
                count += 1

        os.remove(destination_file_path)
예제 #21
0
 def test_to_dss_date(self):
     schema_converter = SchemaConversion()
     path_to_hyper = "data/superstore_sample.hyper"
     hyper = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU)
     connection = Connection(hyper.endpoint, path_to_hyper)
     hyper_table = TableName('public', 'Orders')
     hyper_table_def = connection.catalog.get_table_definition(hyper_table)
     result = connection.execute_query(f'SELECT * FROM {hyper_table}')
     for row in result:
         pass
     sample_date = row[2].to_date()
     dss_date = datetime.datetime(sample_date.year, sample_date.month,
                                  sample_date.day)
     connection.close()
     hyper.close()
     dss_columns = schema_converter.hyper_columns_to_dss_columns(
         hyper_table_def.columns)
     return True
예제 #22
0
def hyperfile_to_df(hyper_path):
    print(hyper_path)
    with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with Connection(endpoint=hyper.endpoint,database=hyper_path) as connection:
            print(connection.catalog.get_table_names(schema="Extract"))
            tablename = connection.catalog.get_table_names(schema="Extract")[0]
            table_definition = connection.catalog.get_table_definition(name=QualifiedName("Extract",tablename))
            rows_in_table = connection.execute_result_list(query=f"SELECT * FROM {QualifiedName('Extract',tablename)}")#f"SELECT * FROM {escape_name(tablename)}")
            df = pd.DataFrame(columns=[c.name for c in table_definition.columns],data=rows_in_table)
    return df
예제 #23
0
    def test_export_int_values(self):
        # ===> Define parameters input from DSS for exporter
        config = {}
        plugin_config = {}
        schema = {
            'columns': [{
                'name': 'tiny_int',
                'type': 'tinyint'
            }, {
                'name': 'small_int',
                'type': 'smallint'
            }, {
                'name': 'big_int',
                'type': 'bigint'
            }],
            'userModified':
            True
        }
        rows = [(12, 370, -9223372036854775808), (2, 1000, 2147483647),
                (15, 15000, 21474836477777777), (126, 32766, 2147483648),
                (127, 32767, 2147483648)]
        # <===

        # ===> Create a DSS-like exporter
        exporter = TableauHyperExporter(config, plugin_config)
        output_file_name = get_random_alphanumeric_string(10) + '.hyper'
        destination_file_path = os.path.join(self.output_path,
                                             output_file_name)
        exporter.open_to_file(schema, destination_file_path)
        for row in rows:
            exporter.write_row(row)
        exporter.close()

        with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU
                          ) as hyper:
            with Connection(endpoint=hyper.endpoint,
                            database=destination_file_path) as connection:
                with connection.execute_query(
                        query=f"SELECT * FROM {TableName('Extract', 'Extract')}"
                ) as result:
                    rows_from_hyper = list(result)

        assert len(rows) == len(rows_from_hyper)

        count = 0
        valid = 0
        for i in range(len(rows)):
            row_dss, row_hyper = rows[i], rows_from_hyper[i]
            for j in range(len(row_dss)):
                a, b = row_dss[j], row_hyper[j]
                if (pd.isna(a) and pd.isna(b)) or (a == b):
                    valid += 1
                count += 1

        os.remove(destination_file_path)
def run_update_data_in_existing_hyper_file():
    """
    An example of how to update data in an existing Hyper file.
    """
    print("EXAMPLE - Update existing data in an Hyper file")

    # Path to a Hyper file containing all data inserted into Customer, Product, Orders and LineItems table.
    # See "insert_data_into_multiple_tables.py" for an example that works with the complete schema.
    path_to_source_database = Path(
        __file__).parent / "data" / "superstore_sample.hyper"

    # Make a copy of the superstore sample Hyper file.
    path_to_database = Path(
        shutil.copy(path_to_source_database,
                    "superstore_sample_update.hyper")).resolve()

    # Starts the Hyper Process with telemetry enabled to send data to Tableau.
    # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:

        # Connect to existing Hyper file "superstore_sample_update.hyper".
        with Connection(endpoint=hyper.endpoint,
                        database=path_to_database) as connection:

            rows_pre_update = connection.execute_list_query(
                query=
                f"SELECT {escape_name('Loyalty Reward Points')}, {escape_name('Segment')}"
                f"FROM {escape_name('Customer')}")
            print(
                f"Pre-Update: Individual rows showing 'Loyalty Reward Points' and 'Segment' "
                f"columns: {rows_pre_update}\n")

            print(
                "Update 'Customers' table by adding 50 Loyalty Reward Points to all Corporate Customers."
            )
            row_count = connection.execute_command(
                command=f"UPDATE {escape_name('Customer')} "
                f"SET {escape_name('Loyalty Reward Points')} = {escape_name('Loyalty Reward Points')} + 50 "
                f"WHERE {escape_name('Segment')} = {escape_string_literal('Corporate')}"
            )

            print(
                f"The number of updated rows in table {escape_name('Customer')} is {row_count}"
            )

            rows_post_update = connection.execute_list_query(
                query=
                f"SELECT {escape_name('Loyalty Reward Points')}, {escape_name('Segment')} "
                f"FROM {escape_name('Customer')}")
            print(
                f"Post-Update: Individual rows showing 'Loyalty Reward Points' and 'Segment' "
                f"columns: {rows_post_update}")

        print("The connection to the Hyper file has been closed.")
    print("The Hyper process has been shut down.")
예제 #25
0
 def delete(self, where_condition):
     # Start Hyper
     with HyperProcess(
             telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
         #  Connect to an existing .hyper file (CreateMode.NONE)
         with Connection(endpoint=hyper.endpoint,
                         database=self.path) as connection:
             delete_command = 'DELETE FROM "Extract"."Extract" WHERE {}'.format(
                 where_condition)
             row_count = connection.execute_command(command=delete_command)
             logging.info(f'{delete_command}, row_count={row_count}')
예제 #26
0
def get_table_rows(table):
    """
    Returns a list of list representing all the rows in the table
    """
    with HyperProcess(
            telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with Connection(endpoint=hyper.endpoint,
                        database=settings.HYPER_FILE_PATH) as connection:
            rows_in_table = connection.execute_list_query(
                query=f"SELECT * FROM {table}")
            return rows_in_table
예제 #27
0
def run_delete_data_in_existing_hyper_file():
    """
    An example of how to delete data in an existing Hyper file.
    """
    print("EXAMPLE - Delete data from an existing Hyper file")

    # Path to a Hyper file containing all data inserted into Customer, Product, Orders and LineItems table.
    # See "insert_data_into_multiple_tables.py" for an example that works with the complete schema.
    path_to_source_database = Path(
        __file__).parent / "data" / "superstore_sample.hyper"

    # Make a copy of the superstore example Hyper file.
    path_to_database = Path(
        shutil.copy(path_to_source_database,
                    "superstore_sample_delete.hyper")).resolve()

    # Starts the Hyper Process with telemetry enabled to send data to Tableau.
    # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:

        # Connect to existing Hyper file "superstore_sample_delete.hyper".
        with Connection(endpoint=hyper.endpoint,
                        database=path_to_database) as connection:

            print(
                f"Delete all rows from customer with the name 'Dennis Kane' from table {escape_name('Orders')}."
            )
            # `execute_command` executes a SQL statement and returns the impacted row count.
            row_count = connection.execute_command(
                command=f"DELETE FROM {escape_name('Orders')} "
                f"WHERE {escape_name('Customer ID')} = ANY("
                f"SELECT {escape_name('Customer ID')} FROM {escape_name('Customer')} "
                f"WHERE {escape_name('Customer Name')} = {escape_string_literal('Dennis Kane')})"
            )

            print(
                f"The number of deleted rows in table {escape_name('Orders')} "
                f"is {row_count}.\n")

            print(
                f"Delete all rows from customer with the name 'Dennis Kane' from table {escape_name('Customer')}."
            )
            row_count = connection.execute_command(
                command=f"DELETE FROM {escape_name('Customer')} "
                f"WHERE {escape_name('Customer Name')} = {escape_string_literal('Dennis Kane')}"
            )

            print(
                f"The number of deleted rows in table Customer is {row_count}."
            )

        print("The connection to the Hyper file has been closed.")
    print("The Hyper process has been shut down.")
예제 #28
0
 def print_rows(self):
     """Prints the first 1,000 rows of a Hyper file"""
     with HyperProcess(
             telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hp:
         with Connection(endpoint=hp.endpoint,
                         database=self.path) as connection:
             rows = connection.execute_list_query(
                 f"SELECT * FROM {TableName('Extract', 'Extract')} LIMIT 1000"
             )
             print("Showing first 1,000 rows")
             for row in rows:
                 print(row)
예제 #29
0
def write_df_to_hyper(df,hyper_path):
    with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with Connection(endpoint=hyper.endpoint,database=hyper_path) as connection:
            tablename = connection.catalog.get_table_names(schema="Extract")[0]

            # Delete the old data
            connection.execute_command(command=f"DELETE FROM {escape_name(tablename)}")

            with Inserter(connection, tablename) as inserter:
                inserter.add_rows(rows=df.values.tolist())
                inserter.execute()
    return hyper_file_path
예제 #30
0
def run_create_hyper_file_from_csv():
    """
    An example demonstrating loading data from a csv into a new Hyper file
    """
    print("EXAMPLE - Load data from CSV into table in new Hyper file")

    path_to_database = Path("customer.hyper")

    # Optional process parameters.
    # They are documented in the Tableau Hyper documentation, chapter "Process Settings"
    # (https://help.tableau.com/current/api/hyper_api/en-us/reference/sql/processsettings.html).
    process_parameters = {
        # Limits the number of Hyper event log files to two.
        "log_file_max_count": "2",
        # Limits the size of Hyper event log files to 100 megabytes.
        "log_file_size_limit": "100M"
    }

    # Starts the Hyper Process with telemetry enabled to send data to Tableau.
    # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU,
                      parameters=process_parameters) as hyper:

        # Optional connection parameters.
        # They are documented in the Tableau Hyper documentation, chapter "Connection Settings"
        # (https://help.tableau.com/current/api/hyper_api/en-us/reference/sql/connectionsettings.html).
        connection_parameters = {"lc_time": "en_US"}

        # Creates new Hyper file "customer.hyper".
        # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists.
        with Connection(endpoint=hyper.endpoint,
                        database=path_to_database,
                        create_mode=CreateMode.CREATE_AND_REPLACE,
                        parameters=connection_parameters) as connection:

            connection.catalog.create_table(table_definition=customer_table)

            # Using path to current file, create a path that locates CSV file packaged with these examples.
            path_to_csv = str(Path(__file__).parent / "data" / "customers.csv")

            # Load all rows into "Customers" table from the CSV file.
            # `execute_command` executes a SQL statement and returns the impacted row count.
            count_in_customer_table = connection.execute_command(
                command=
                f"COPY {customer_table.table_name} from {escape_string_literal(path_to_csv)} with "
                f"(format csv, NULL 'NULL', delimiter ',', header)")

            print(
                f"The number of rows in table {customer_table.table_name} is {count_in_customer_table}."
            )

        print("The connection to the Hyper file has been closed.")
    print("The Hyper process has been shut down.")