Esempio n. 1
0
def get_table_columns(table):
    """
    Returns the list of the columns in the table.
    This is a list of dictionaries having the following keys:
    * name: the Tableau caption of the column (when found)
    * type: the data type contained in that column
    * nullability: a flag indicating if the column may contain null values
    """
    return_value = []

    columns_mappings = get_columns_mappings()

    with HyperProcess(
            telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with Connection(endpoint=hyper.endpoint,
                        database=settings.HYPER_FILE_PATH) as connection:
            table_definition = connection.catalog.get_table_definition(
                name=table)
            for column in table_definition.columns:
                name = str(column.name).replace('"', "")
                if name in columns_mappings.keys():
                    name = columns_mappings[name]
                return_value.append({
                    "name": name,
                    "type": str(column.type),
                    "nullability": str(column.nullability),
                })
    return return_value
 def fn_hyper_handle(self, in_logger, timer, in_dict):
     timer.start()
     out_data_frame = None
     try:
         # Starts Hyper Process with telemetry enabled/disabled to send data to Tableau or not
         # To opt in, simply set telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU.
         # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
         telemetry_chosen = Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU
         with HyperProcess(telemetry=telemetry_chosen) as hyper_process:
             in_logger.debug(
                 self.locale.gettext('Hyper engine process initialized'))
             in_logger.debug(
                 self.locale.gettext(
                     'Chosen Telemetry is {telemetry_value}').replace(
                         '{telemetry_value}', str(telemetry_chosen)))
             timer.stop()
             timer.start()
             hyper_create_mode = {
                 'append': CreateMode.NONE,
                 'overwrite': CreateMode.CREATE_AND_REPLACE,
                 'read': CreateMode.NONE,
                 'delete': CreateMode.NONE,
                 'update': CreateMode.NONE,
             }
             #  Connect to an existing .hyper file
             with Connection(endpoint=hyper_process.endpoint,
                             database=in_dict['hyper file'],
                             create_mode=hyper_create_mode.get(
                                 in_dict['action'])) as hyper_connection:
                 in_logger.debug(
                     self.locale.gettext(
                         'Connection to the Hyper engine using file name "{file_name}" '
                         + 'has been established').replace(
                             '{file_name}', in_dict['hyper file']))
                 timer.stop()
                 in_dict['connection'] = hyper_connection
                 if in_dict['action'] == 'read':
                     out_data_frame = self.fn_hyper_read(
                         in_logger, timer, in_dict)
                 elif in_dict['action'] in ('append', 'overwrite'):
                     self.fn_write_data_into_hyper_file(
                         in_logger, timer, in_dict)
                 elif in_dict['action'] in ('delete', 'update'):
                     self.fn_delete_data_from_hyper(in_logger, timer,
                                                    in_dict)
                     self.fn_get_records_count_from_table(
                         in_logger, timer, in_dict)
         timer.start()
         hyper_connection.close()
         in_logger.info(
             self.locale.gettext(
                 'Connection to the Hyper engine file has been closed'))
         in_logger.info(
             self.locale.gettext('Hyper engine process has been shut down'))
         timer.stop()
     except HyperException as ex:
         in_logger.error(str(ex).replace(chr(10), ' '))
         timer.stop()
         exit(1)
     return out_data_frame
def run_create_hyper_file_from_csv():

    path_to_database = Path("Workbook_Info.hyper")

    process_parameters = {
        "log_file_max_count": "2",
        "log_file_size_limit": "100M"
    }

    with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU, parameters=process_parameters) as hyper:

        connection_parameters = {"lc_time": "en_US"}

        with Connection(endpoint=hyper.endpoint,
                        database=path_to_database,
                        create_mode=CreateMode.CREATE_AND_REPLACE,
                        parameters=connection_parameters) as connection:

            connection.catalog.create_table(table_definition=Workbook_Info_table)

            path_to_csv = str(Path(__file__).parent / "workbook_info.csv")

            count_in_Workbook_Info_table = connection.execute_command(
                command=f"COPY {Workbook_Info_table.table_name} from {escape_string_literal(path_to_csv)} with "
                f"(format csv, NULL 'NULL', delimiter ',', header)")
Esempio n. 4
0
 def _create_schema(self):
     with HyperProcess(
             telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
         with Connection(hyper.endpoint, self.hyper,
                         CreateMode.CREATE_AND_REPLACE) as connection:
             for table in self.tables:
                 connection.catalog.create_table(table)
def HyperCreate():
    print(">>> Creating Hyper File <<<")
    path_to_database = Path(hyperfile)
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with Connection(endpoint=hyper.endpoint,database=path_to_database,create_mode=CreateMode.CREATE_IF_NOT_EXISTS) as connection:
            connection.execute_command(command=
                f'''create table if not exists http  (
                    serving_host             text,
                    client_host              text,
                    username                 text,
                    ts                       text,
                    timezone                 text,
                    port                     text,
                    request_body             text,
                    xforward_for             text,
                    status_code              text,
                    response_size            text,
                    content_length           text,
                    request_time_ms          text,
                    request_id               text
                );''')
            connection.execute_command(command=
                f'''create table if not exists dump_table (
                    dump text
                );''')
Esempio n. 6
0
def createHyperExtract(hyperLocation, extract_table, records, tableName):
    path_to_database = Path(hyperLocation)

    with HyperProcess(
            telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with Connection_tab(
                endpoint=hyper.endpoint,
                database=path_to_database,
                create_mode=CreateMode.CREATE_AND_REPLACE) as connection:
            connection.catalog.create_schema(
                schema=extract_table.table_name.schema_name)
            connection.catalog.create_table(table_definition=extract_table)

            with Inserter(connection, extract_table) as inserter:
                inserter.add_rows(rows=records)
                inserter.execute()

            table_names = connection.catalog.get_table_names(
                extract_table.table_name.schema_name)
            logger.debug(
                f"  Tables available in {path_to_database} are: {table_names}")

            row_count = connection.execute_scalar_query(
                query=f"SELECT COUNT(*) FROM {extract_table.table_name}")
            logger.info(
                f"  The number of rows in table {extract_table.table_name} is {row_count}."
            )

        logger.debug("  The connection to the Hyper file has been closed.")
    logger.debug("  The Hyper process has been shut down.")
Esempio n. 7
0
    def append(self, sql: str = None, df: DataFrame = None):
        """Appends new data to a Hyper File"""
        # First, must materialize the new data back to the driver node
        if sql is not None and df is None:
            self.sql = sql
            self.df = get_spark_dataframe(sql)
        elif sql is None and df is not None:
            self.df = df
        else:
            raise ValueError(
                'Missing either SQL statement or Spark DataFrame as argument.')
        data = HyperUtils.get_rows(self.df)

        # Convert the Spark DataFrame schema to a Tableau Table Def
        table_def = HyperUtils.get_table_def(self.df, "Extract", "Extract")

        # Insert, the new data into Hyper File
        hyper_database_path = self.path
        logging.info(
            f'Inserting new data into Hyper database: {hyper_database_path}')
        with HyperProcess(
                telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hp:
            with Connection(endpoint=hp.endpoint,
                            database=hyper_database_path,
                            create_mode=CreateMode.NONE) as connection:
                with Inserter(connection, table_def) as inserter:
                    inserter.add_rows(rows=data)
                    inserter.execute()
Esempio n. 8
0
 def get_row_count(schema: str, table: str, hyper_file_path: str):
     with HyperProcess(
             telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hp:
         with Connection(endpoint=hp.endpoint,
                         database=hyper_file_path) as connection:
             # Query the Hyper File for the number of rows in the table
             return connection.execute_scalar_query(
                 f"SELECT COUNT(*) FROM {TableName(schema, table)}")
Esempio n. 9
0
 def get_tables(schema: str, hyper_file_path: str):
     with HyperProcess(
             telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hp:
         with Connection(endpoint=hp.endpoint,
                         database=hyper_file_path) as connection:
             catalog = connection.catalog
             # Query the Catalog API for all tables under the given schema
             return catalog.get_table_names(SchemaName(schema))
Esempio n. 10
0
    def test_export_date_values(self):
        """
        Test the export of geo values (DSS geopoint storage type)
        :return:
        """
        nan = float("nan")

        # ===> Define parameters input from DSS for exporter
        config = {}
        plugin_config = {}
        schema = {
            'columns': [{
                'name': 'price',
                'type': 'double'
            }, {
                'name': 'last_review_parsed',
                'type': 'date'
            }],
            'userModified':
            True
        }
        rows = [(149.0, Timestamp('2018-10-19 13:20:50.349000')),
                (225.0, Timestamp('2019-05-21 00:00:00')), (150.0, NaT),
                (80.0, NaT)]
        # <===

        # ===> Create a DSS-like exporter
        exporter = TableauHyperExporter(config, plugin_config)
        output_file_name = get_random_alphanumeric_string(10) + '.hyper'
        destination_file_path = os.path.join(self.output_path,
                                             output_file_name)
        exporter.open_to_file(schema, destination_file_path)
        for row in rows:
            exporter.write_row(row)
        exporter.close()

        with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU
                          ) as hyper:
            with Connection(endpoint=hyper.endpoint,
                            database=destination_file_path) as connection:
                with connection.execute_query(
                        query=f"SELECT * FROM {TableName('Extract', 'Extract')}"
                ) as result:
                    rows_from_hyper = list(result)

        assert len(rows) == len(rows_from_hyper)

        count = 0
        valid = 0
        for i in range(len(rows)):
            row_dss, row_hyper = rows[i], rows_from_hyper[i]
            for j in range(len(row_dss)):
                a, b = row_dss[j], row_hyper[j]
                if (pd.isna(a) and pd.isna(b)) or (a == b):
                    valid += 1
                count += 1

        os.remove(destination_file_path)
Esempio n. 11
0
 def test_to_dss_date(self):
     schema_converter = SchemaConversion()
     path_to_hyper = "data/superstore_sample.hyper"
     hyper = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU)
     connection = Connection(hyper.endpoint, path_to_hyper)
     hyper_table = TableName('public', 'Orders')
     hyper_table_def = connection.catalog.get_table_definition(hyper_table)
     result = connection.execute_query(f'SELECT * FROM {hyper_table}')
     for row in result:
         pass
     sample_date = row[2].to_date()
     dss_date = datetime.datetime(sample_date.year, sample_date.month,
                                  sample_date.day)
     connection.close()
     hyper.close()
     dss_columns = schema_converter.hyper_columns_to_dss_columns(
         hyper_table_def.columns)
     return True
Esempio n. 12
0
def hyperfile_to_df(hyper_path):
    print(hyper_path)
    with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with Connection(endpoint=hyper.endpoint,database=hyper_path) as connection:
            print(connection.catalog.get_table_names(schema="Extract"))
            tablename = connection.catalog.get_table_names(schema="Extract")[0]
            table_definition = connection.catalog.get_table_definition(name=QualifiedName("Extract",tablename))
            rows_in_table = connection.execute_result_list(query=f"SELECT * FROM {QualifiedName('Extract',tablename)}")#f"SELECT * FROM {escape_name(tablename)}")
            df = pd.DataFrame(columns=[c.name for c in table_definition.columns],data=rows_in_table)
    return df
def run_update_data_in_existing_hyper_file():
    """
    An example of how to update data in an existing Hyper file.
    """
    print("EXAMPLE - Update existing data in an Hyper file")

    # Path to a Hyper file containing all data inserted into Customer, Product, Orders and LineItems table.
    # See "insert_data_into_multiple_tables.py" for an example that works with the complete schema.
    path_to_source_database = Path(
        __file__).parent / "data" / "superstore_sample.hyper"

    # Make a copy of the superstore sample Hyper file.
    path_to_database = Path(
        shutil.copy(path_to_source_database,
                    "superstore_sample_update.hyper")).resolve()

    # Starts the Hyper Process with telemetry enabled to send data to Tableau.
    # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:

        # Connect to existing Hyper file "superstore_sample_update.hyper".
        with Connection(endpoint=hyper.endpoint,
                        database=path_to_database) as connection:

            rows_pre_update = connection.execute_list_query(
                query=
                f"SELECT {escape_name('Loyalty Reward Points')}, {escape_name('Segment')}"
                f"FROM {escape_name('Customer')}")
            print(
                f"Pre-Update: Individual rows showing 'Loyalty Reward Points' and 'Segment' "
                f"columns: {rows_pre_update}\n")

            print(
                "Update 'Customers' table by adding 50 Loyalty Reward Points to all Corporate Customers."
            )
            row_count = connection.execute_command(
                command=f"UPDATE {escape_name('Customer')} "
                f"SET {escape_name('Loyalty Reward Points')} = {escape_name('Loyalty Reward Points')} + 50 "
                f"WHERE {escape_name('Segment')} = {escape_string_literal('Corporate')}"
            )

            print(
                f"The number of updated rows in table {escape_name('Customer')} is {row_count}"
            )

            rows_post_update = connection.execute_list_query(
                query=
                f"SELECT {escape_name('Loyalty Reward Points')}, {escape_name('Segment')} "
                f"FROM {escape_name('Customer')}")
            print(
                f"Post-Update: Individual rows showing 'Loyalty Reward Points' and 'Segment' "
                f"columns: {rows_post_update}")

        print("The connection to the Hyper file has been closed.")
    print("The Hyper process has been shut down.")
Esempio n. 14
0
    def test_export_int_values(self):
        # ===> Define parameters input from DSS for exporter
        config = {}
        plugin_config = {}
        schema = {
            'columns': [{
                'name': 'tiny_int',
                'type': 'tinyint'
            }, {
                'name': 'small_int',
                'type': 'smallint'
            }, {
                'name': 'big_int',
                'type': 'bigint'
            }],
            'userModified':
            True
        }
        rows = [(12, 370, -9223372036854775808), (2, 1000, 2147483647),
                (15, 15000, 21474836477777777), (126, 32766, 2147483648),
                (127, 32767, 2147483648)]
        # <===

        # ===> Create a DSS-like exporter
        exporter = TableauHyperExporter(config, plugin_config)
        output_file_name = get_random_alphanumeric_string(10) + '.hyper'
        destination_file_path = os.path.join(self.output_path,
                                             output_file_name)
        exporter.open_to_file(schema, destination_file_path)
        for row in rows:
            exporter.write_row(row)
        exporter.close()

        with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU
                          ) as hyper:
            with Connection(endpoint=hyper.endpoint,
                            database=destination_file_path) as connection:
                with connection.execute_query(
                        query=f"SELECT * FROM {TableName('Extract', 'Extract')}"
                ) as result:
                    rows_from_hyper = list(result)

        assert len(rows) == len(rows_from_hyper)

        count = 0
        valid = 0
        for i in range(len(rows)):
            row_dss, row_hyper = rows[i], rows_from_hyper[i]
            for j in range(len(row_dss)):
                a, b = row_dss[j], row_hyper[j]
                if (pd.isna(a) and pd.isna(b)) or (a == b):
                    valid += 1
                count += 1

        os.remove(destination_file_path)
Esempio n. 15
0
def get_table_rows(table):
    """
    Returns a list of list representing all the rows in the table
    """
    with HyperProcess(
            telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with Connection(endpoint=hyper.endpoint,
                        database=settings.HYPER_FILE_PATH) as connection:
            rows_in_table = connection.execute_list_query(
                query=f"SELECT * FROM {table}")
            return rows_in_table
Esempio n. 16
0
 def delete(self, where_condition):
     # Start Hyper
     with HyperProcess(
             telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
         #  Connect to an existing .hyper file (CreateMode.NONE)
         with Connection(endpoint=hyper.endpoint,
                         database=self.path) as connection:
             delete_command = 'DELETE FROM "Extract"."Extract" WHERE {}'.format(
                 where_condition)
             row_count = connection.execute_command(command=delete_command)
             logging.info(f'{delete_command}, row_count={row_count}')
Esempio n. 17
0
 def print_rows(self):
     """Prints the first 1,000 rows of a Hyper file"""
     with HyperProcess(
             telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hp:
         with Connection(endpoint=hp.endpoint,
                         database=self.path) as connection:
             rows = connection.execute_list_query(
                 f"SELECT * FROM {TableName('Extract', 'Extract')} LIMIT 1000"
             )
             print("Showing first 1,000 rows")
             for row in rows:
                 print(row)
Esempio n. 18
0
def run_delete_data_in_existing_hyper_file():
    """
    An example of how to delete data in an existing Hyper file.
    """
    print("EXAMPLE - Delete data from an existing Hyper file")

    # Path to a Hyper file containing all data inserted into Customer, Product, Orders and LineItems table.
    # See "insert_data_into_multiple_tables.py" for an example that works with the complete schema.
    path_to_source_database = Path(
        __file__).parent / "data" / "superstore_sample.hyper"

    # Make a copy of the superstore example Hyper file.
    path_to_database = Path(
        shutil.copy(path_to_source_database,
                    "superstore_sample_delete.hyper")).resolve()

    # Starts the Hyper Process with telemetry enabled to send data to Tableau.
    # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:

        # Connect to existing Hyper file "superstore_sample_delete.hyper".
        with Connection(endpoint=hyper.endpoint,
                        database=path_to_database) as connection:

            print(
                f"Delete all rows from customer with the name 'Dennis Kane' from table {escape_name('Orders')}."
            )
            # `execute_command` executes a SQL statement and returns the impacted row count.
            row_count = connection.execute_command(
                command=f"DELETE FROM {escape_name('Orders')} "
                f"WHERE {escape_name('Customer ID')} = ANY("
                f"SELECT {escape_name('Customer ID')} FROM {escape_name('Customer')} "
                f"WHERE {escape_name('Customer Name')} = {escape_string_literal('Dennis Kane')})"
            )

            print(
                f"The number of deleted rows in table {escape_name('Orders')} "
                f"is {row_count}.\n")

            print(
                f"Delete all rows from customer with the name 'Dennis Kane' from table {escape_name('Customer')}."
            )
            row_count = connection.execute_command(
                command=f"DELETE FROM {escape_name('Customer')} "
                f"WHERE {escape_name('Customer Name')} = {escape_string_literal('Dennis Kane')}"
            )

            print(
                f"The number of deleted rows in table Customer is {row_count}."
            )

        print("The connection to the Hyper file has been closed.")
    print("The Hyper process has been shut down.")
Esempio n. 19
0
def run_create_hyper_file_from_csv():
    """
    An example demonstrating loading data from a csv into a new Hyper file
    """
    print("EXAMPLE - Load data from CSV into table in new Hyper file")

    path_to_database = Path("customer.hyper")

    # Optional process parameters.
    # They are documented in the Tableau Hyper documentation, chapter "Process Settings"
    # (https://help.tableau.com/current/api/hyper_api/en-us/reference/sql/processsettings.html).
    process_parameters = {
        # Limits the number of Hyper event log files to two.
        "log_file_max_count": "2",
        # Limits the size of Hyper event log files to 100 megabytes.
        "log_file_size_limit": "100M"
    }

    # Starts the Hyper Process with telemetry enabled to send data to Tableau.
    # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU,
                      parameters=process_parameters) as hyper:

        # Optional connection parameters.
        # They are documented in the Tableau Hyper documentation, chapter "Connection Settings"
        # (https://help.tableau.com/current/api/hyper_api/en-us/reference/sql/connectionsettings.html).
        connection_parameters = {"lc_time": "en_US"}

        # Creates new Hyper file "customer.hyper".
        # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists.
        with Connection(endpoint=hyper.endpoint,
                        database=path_to_database,
                        create_mode=CreateMode.CREATE_AND_REPLACE,
                        parameters=connection_parameters) as connection:

            connection.catalog.create_table(table_definition=customer_table)

            # Using path to current file, create a path that locates CSV file packaged with these examples.
            path_to_csv = str(Path(__file__).parent / "data" / "customers.csv")

            # Load all rows into "Customers" table from the CSV file.
            # `execute_command` executes a SQL statement and returns the impacted row count.
            count_in_customer_table = connection.execute_command(
                command=
                f"COPY {customer_table.table_name} from {escape_string_literal(path_to_csv)} with "
                f"(format csv, NULL 'NULL', delimiter ',', header)")

            print(
                f"The number of rows in table {customer_table.table_name} is {count_in_customer_table}."
            )

        print("The connection to the Hyper file has been closed.")
    print("The Hyper process has been shut down.")
Esempio n. 20
0
def write_df_to_hyper(df,hyper_path):
    with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with Connection(endpoint=hyper.endpoint,database=hyper_path) as connection:
            tablename = connection.catalog.get_table_names(schema="Extract")[0]

            # Delete the old data
            connection.execute_command(command=f"DELETE FROM {escape_name(tablename)}")

            with Inserter(connection, tablename) as inserter:
                inserter.add_rows(rows=df.values.tolist())
                inserter.execute()
    return hyper_file_path
def df_to_extract(df, output_path):
    '''
    Converts a Pandas dataframe to a Tableau Extract.

    Parameters
    ----------
    df (pandas dataframe): Dataframe to turn into a Tableau extract
    output_path (str): Where to create the Tableau extract
    ''' 

    # Replace nan's with 0
    df = df.replace(np.nan, 0.0, regex=True)

    print('Creating Tableau data extract...')
    with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with Connection(hyper.endpoint, output_path, CreateMode.CREATE_AND_REPLACE) as connection:
            
            # Create schema
            connection.catalog.create_schema('Extract')

            # Create list of column definitions, based on the datatypes in pandas dataframe
            dtype_map = {
                'int32': SqlType.int(),
                'int64': SqlType.big_int(),
                'float32': SqlType.double(),
                'float64': SqlType.double(),
                'datetime64[ns]': SqlType.date(),
                'object': SqlType.text() 
            }
            table_def = []

            # Get column headers to loop through them
            df_columns = list(df)

            for col_header in df_columns:
                dtype_str = str(df[col_header].dtype)

                # Use dtype_str to lookup appropiate SqlType from dtype_map and append new column to table definition
                table_def.append(TableDefinition.Column(col_header, dtype_map[dtype_str]))
                
            # Define table
            extract_table = TableDefinition(TableName('Extract', 'Extract'), table_def)

            # Create table
            connection.catalog.create_table(extract_table)

            # Insert data
            with Inserter(connection, extract_table) as inserter:
                for idx, row in df.iterrows():
                    inserter.add_row(row)
                
                inserter.execute() 
def HTTPtoHyper(accessfile):

    print (">>> Importing Access File " + accessfile + " to Hyper...")

    path_to_database = Path(hyperfile)

    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with Connection(endpoint=hyper.endpoint,
                        database=path_to_database) as connection:
            connection.execute_command(
                command=f"COPY http from {escape_string_literal(accessfile)} with "
                f"(format CSV, delimiter ' ', NULL '-', QUOTE '\"', ESCAPE '\\')")

    print("HTTP file imported in to Hyper...")
Esempio n. 23
0
    def run(self, args):
        """ Runs the command
        :param args: Arguments from argparse.Namespace
        """
        input_file = args.input_file
        output_file = args.output_file
        wkt_file = Path(args.wkt_path)
        role_name = args.role_name
        id_field = args.id_field
        # Grab the CSV
        csv_query = CsvQueryClass()
        csv_query.open_csv(wkt_file)

        # if the output file already exists, delete
        if os.path.exists(output_file):
            if os.name == "nt":  # deal with Windows
                os.remove(output_file)
            else:
                subprocess.call(["rm", "-rf", output_file])
        shutil.copyfile(input_file, output_file)

        # Starts the Hyper Process with telemetry enabled to send data to Tableau.
        # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
        with HyperProcess(
                telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
            with Connection(endpoint=hyper.endpoint,
                            database=output_file) as connection:
                table_name = TableName(
                    "public", "LocalData" + role_name
                )  #TODO: Make this dynamic based on an input parameter.
                geo_name = Name('Geometry')
                map_code_name = Name('MapCode')
                latitude_name = Name('Latitude')
                longitude_name = Name('Longitude')

                connection.execute_query(
                    f"ALTER TABLE {table_name} ADD COLUMN {geo_name} TEXT,"
                    f" ADD COLUMN {map_code_name} INTEGER").close()
                for mrow in csv_query.rows:
                    lat = mrow['Latitude']
                    lng = mrow['Longitude']
                    wkt = mrow['WKT']
                    with connection.execute_query(
                            f"UPDATE {table_name}" +
                            f" SET {geo_name}={escape_string_literal(wkt)}, {map_code_name}=0"
                            + f" WHERE {latitude_name}={lat}"
                            f" AND {longitude_name}={lng}") as result:
                        print(f"{result.affected_row_count} rows changed")
        print('done')
Esempio n. 24
0
 def print_table_def(self, schema: str = "Extract", table: str = "Extract"):
     """Prints the table definition for a table in a Hyper file."""
     with HyperProcess(
             telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hp:
         with Connection(endpoint=hp.endpoint,
                         database=self.path) as connection:
             table_name = TableName(schema, table)
             table_definition = connection.catalog.get_table_definition(
                 name=table_name)
             # Print all column information
             print("root")
             for column in table_definition.columns:
                 print(
                     f"|-- {column.name}: {column.type} (nullable = {column.nullability})"
                 )
def hyper_prepare(hyper_path, functional_ordered_column, column_value):
    """Function that prepares the given hyper file: based on the hyper's path, the functional ordered column and its value,
     the hyper file is cleaned from the latest set of data by deleting all data with the given column value or greater values
     """
    path_to_database = Path(hyper_path).expanduser().resolve()
    with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU,
                      user_agent=os.path.basename(__file__)) as hyper:
        with Connection(endpoint=hyper.endpoint,
                        database=path_to_database) as connection:
            table_name = TableName("Extract", "Extract")
            rows_affected = connection.execute_command(
                command=
                f'DELETE FROM {table_name} WHERE "{functional_ordered_column}" >= {column_value}'
            )
            return rows_affected
Esempio n. 26
0
def create():
    with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        request_data = request.get_json()
        print(request_data)
        print("The HyperProcess has started.")
        object_name = "mealprep.hyper"
        file_name = os.environ.get('bucket_name')

        with Connection(
                endpoint=hyper.endpoint,
                database=path_to_database,
                create_mode=CreateMode.CREATE_AND_REPLACE) as connection:
            print("The connection to the Hyper file is open.")
            connection.catalog.create_schema('Extract')
            example_table = TableDefinition(TableName('Extract', 'Extract'), [
                TableDefinition.Column('Breakfast', SqlType.text()),
                TableDefinition.Column('Lunch', SqlType.text()),
                TableDefinition.Column('Dinner', SqlType.text()),
            ])
            print("The table is defined.")
            connection.catalog.create_table(example_table)
            print(example_table)
            print(type(example_table))
            with Inserter(connection, example_table) as inserter:
                for i in request_data['data']:
                    inserter.add_row([i['breakfast'], i['lunch'], i['dinner']])

                inserter.execute()
                print("The data was added to the table.")

            print("The connection to the Hyper extract file is closed.")
        print("The HyperProcess has shut down.")

        with open('mealprep.hyper', 'rb') as reader:
            if object_name is None:
                object_name = file_name
            s3_client = boto3.client(
                's3',
                aws_access_key_id=os.environ.get('aws_access_key_id'),
                aws_secret_access_key=os.environ.get('aws_secret_access_key'))
            try:
                response = s3_client.upload_fileobj(reader, file_name,
                                                    object_name)
            except ClientError as e:
                logging.error(e)
                return False

    return redirect(url_for('index'))
Esempio n. 27
0
def run_read_data_from_existing_hyper_file():
    """
    An example of how to read and print data from an existing Hyper file.
    """
    print("EXAMPLE - Read data from an existing Hyper file")

    # Path to a Hyper file containing all data inserted into Customer, Product, Orders and LineItems table.
    # See "insert_data_into_multiple_tables.py" for an example that works with the complete schema.
    path_to_source_database = Path(
        __file__).parent / "data" / "superstore_sample_denormalized.hyper"

    # Make a copy of the superstore denormalized sample Hyper file
    path_to_database = Path(
        shutil.copy(
            src=path_to_source_database,
            dst="superstore_sample_denormalized_read.hyper")).resolve()

    # Starts the Hyper Process with telemetry enabled to send data to Tableau.
    # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:

        # Connect to existing Hyper file "superstore_sample_denormalized_read.hyper".
        with Connection(endpoint=hyper.endpoint,
                        database=path_to_database) as connection:
            # The table names in the "Extract" schema (the default schema).
            table_names = connection.catalog.get_table_names(schema="Extract")

            for table in table_names:
                table_definition = connection.catalog.get_table_definition(
                    name=table)
                print(f"Table {table.name} has qualified name: {table}")
                for column in table_definition.columns:
                    print(
                        f"Column {column.name} has type={column.type} and nullability={column.nullability}"
                    )
                print("")

            # Print all rows from the "Extract"."Extract" table.
            table_name = TableName("Extract", "Extract")
            print(f"These are all rows in the table {table_name}:")
            # `execute_list_query` executes a SQL query and returns the result as list of rows of data,
            # each represented by a list of objects.
            rows_in_table = connection.execute_list_query(
                query=f"SELECT * FROM {table_name}")
            print(rows_in_table)

        print("The connection to the Hyper file has been closed.")
    print("The Hyper process has been shut down.")
Esempio n. 28
0
    def populate_extract(self, table, data):
        """Connect to Hyper file and load table with data

        Args:
            table: name of the table you are loading
            data: iterable object containing data to populate table with
        """
        if not any(table == name for name in self.table_names):
            raise ValueError(f"{table} is not part of the schema")

        with HyperProcess(
                telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
            with Connection(hyper.endpoint, self.hyper) as connection:
                with Inserter(connection, table) as inserter:
                    inserter.add_rows(data)
                    inserter.execute()
Esempio n. 29
0
def get_table_names():
    """
    Returns the list of the tables contained into the Tableau data source.
    Normally, there is usually only one table: Extract.Extract
    """
    return_value = []

    with HyperProcess(
            telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with Connection(endpoint=hyper.endpoint,
                        database=settings.HYPER_FILE_PATH) as connection:
            table_names = connection.catalog.get_table_names(schema="Extract")

            for table in table_names:
                return_value.append(table)
    return return_value
def main():
    # Parse Arguments

    options = parseArguments()

    # Create the table schema
    print('[INFO] Importing schema file', options['schema'])
    with open(options['schema']) as schema_file:
        schemaJson = json.load(schema_file)
        if (options['temptable']):
            tempschema = importTextSchema ( schemaJson )
        schema = importSchema( schemaJson )
        print('[INFO] Schema Imported')

        overwrite = CreateMode.CREATE
        if (os.path.exists(options[ 'output' ]) and options[ 'overwrite' ]):
            print('[INFO] Overwriteing existing', options[ 'output' ], 'file')
            overwrite = CreateMode.CREATE_AND_REPLACE

        print('[INFO] Creating new local Hyper instance')
        with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU, 'TheInformationLab-CloudBucket-CSV') as hyper:
            # Create the extract, replace it if it already exists
            print('[INFO] Instance created. Building connection to', options['output'])
            with Connection(hyper.endpoint, options['output'], overwrite) as connection:
                print('[INFO] Connection established. Adding table to Hyper database')
                connection.catalog.create_table(schema)
                if (options['temptable']):
                    connection.catalog.create_table(tempschema)
                print('[INFO] The databse is ready to go. Let\'s get it populated')
                if 'files' not in schemaJson:
                    print('[ERROR] No files listed in the schema json. Key \'files\' required of type [STRING]:\nExiting now\n.')
                    exit( -1 )
                files = schemaJson['files']

                for file in tqdm(files, ascii=True, desc='[INFO] Importing'):
                    localfilepath = file
                    if not options[ 'localread' ]:
                        temp_dir = tempfile.TemporaryDirectory()
                        print('[INFO] Downloading to temp dir', temp_dir.name)
                        s3get = getS3File(options[ 'accesskey' ], options[ 'secretkey' ], options[ 'bucket' ], file, temp_dir.name, folder = options[ 'path' ])
                        localfilepath = temp_dir.name + '/' + file
                    populateExtract(connection, schemaJson, localfilepath, options['ignoreheader'], options['delimiter'], options['temptable'])
                connection.close()
                print('[INFO] The data is in, your Hyper file is ready. Viz on Data Rockstar!')

    return 0