def update_table(self): """ Perform an update of the Tableau Hyper file with new data """ # if there is a geo table, create an intermediate and temporary table if self.is_geo_table: self.tmp_table_inserter = Inserter(self.connection, self.tmp_table_definition) self.tmp_table_inserter.add_rows(self.data) self.tmp_table_inserter.execute() self.tmp_table_inserter.close() self.connection.execute_command( command= f"INSERT INTO {self.output_table_definition.table_name} SELECT * FROM {self.tmp_table_definition.table_name};" ) self.connection.execute_command( command= f"TRUNCATE TABLE {self.tmp_table_definition.table_name};") else: if self.connection is None: logger.warning("Connection to Tableau Hyper file is undefined") self.output_table_inserter = Inserter(self.connection, self.output_table_definition) self.output_table_inserter.add_rows(self.data) self.output_table_inserter.execute() self.output_table_inserter.close() return True
def append(self, sql: str = None, df: DataFrame = None): """Appends new data to a Hyper File""" # First, must materialize the new data back to the driver node if sql is not None and df is None: self.sql = sql self.df = get_spark_dataframe(sql) elif sql is None and df is not None: self.df = df else: raise ValueError( 'Missing either SQL statement or Spark DataFrame as argument.') data = HyperUtils.get_rows(self.df) # Convert the Spark DataFrame schema to a Tableau Table Def table_def = HyperUtils.get_table_def(self.df, "Extract", "Extract") # Insert, the new data into Hyper File hyper_database_path = self.path logging.info( f'Inserting new data into Hyper database: {hyper_database_path}') with HyperProcess( telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hp: with Connection(endpoint=hp.endpoint, database=hyper_database_path, create_mode=CreateMode.NONE) as connection: with Inserter(connection, table_def) as inserter: inserter.add_rows(rows=data) inserter.execute()
def createHyperExtract(hyperLocation, extract_table, records, tableName): path_to_database = Path(hyperLocation) with HyperProcess( telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection_tab( endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema( schema=extract_table.table_name.schema_name) connection.catalog.create_table(table_definition=extract_table) with Inserter(connection, extract_table) as inserter: inserter.add_rows(rows=records) inserter.execute() table_names = connection.catalog.get_table_names( extract_table.table_name.schema_name) logger.debug( f" Tables available in {path_to_database} are: {table_names}") row_count = connection.execute_scalar_query( query=f"SELECT COUNT(*) FROM {extract_table.table_name}") logger.info( f" The number of rows in table {extract_table.table_name} is {row_count}." ) logger.debug(" The connection to the Hyper file has been closed.") logger.debug(" The Hyper process has been shut down.")
def to_hyper(df, hyper_file_name, custom_schema="Extract", custom_table_name="Extract"): """ Write a Tableau Hyper file from a Pandas DataFrame. Currently can only write single table extracts, which is Tableau's default way of creating an extract. Args: df: Specify which DataFrame you want to output hyper_file_name: Specify the file name such as "Example.hyper" custom_schema: If you need to change the schema name. Defaults to "Extract" custom_table_name: If you need to change the schema name. Defaults to "Extract" Returns: Tableau Hyper file """ # Starts the Hyper Process with HyperProcess( telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU, parameters={"log_config": ""}, ) as hyper: # Creates a .hyper file to put the data into with Connection(hyper.endpoint, hyper_file_name, CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema(custom_schema) # create a .hyper compatible column definition # from pd DataFrame column names and dtypes # using 3 list comprehensions to loop through # all the columns in the DataFrame column_names = [column for column in df.columns] column_dtype = [dtype for dtype in df.dtypes] hyper_table = TableDefinition( TableName(custom_schema, custom_table_name), [ TableDefinition.Column( column_names[column], dtype_mapper[str( column_dtype[column])]) for column in range(len(column_names)) ], ) connection.catalog.create_table(hyper_table) # Repace NaN with None, otherwise it will not be Null in Tableau df.replace({np.nan: None}, inplace=True) # Insert the data values into the hyper file data_to_insert = df.to_numpy() with Inserter(connection, hyper_table) as inserter: inserter.add_rows(tqdm((row for row in data_to_insert))) inserter.execute()
def run_create_hyper_file_from_csv(): print("Inside Fucntion to pick data from CSV into table in new Hyper file") with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists with Connection( endpoint=hyper.endpoint, database='C:/Users/admin/Desktop/extrp1.hyper', create_mode=CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema('extract') connection.catalog.create_table(table_definition=extract_table) # create a path that locates CSV file to be used path_to_csv = 'C:/Users/admin/Desktop/testing1.csv' # `execute_command` executes a SQL statement and returns the impacted row count. count_in_table = connection.execute_command( command= f"COPY {extract_table.table_name} from {escape_string_literal(path_to_csv)} with " f"(format csv, NULL 'NULL', delimiter ',', header)") print(count_in_table) with Inserter(connection, TableName('extract', 'extract')) as inserter: inserter.add_rows(rows=data_to_insert) inserter.execute()
def insert_data(): """ Creates a simple .hyper file. For more on this, see the below example: https://github.com/tableau/hyper-api-samples/blob/main/Tableau-Supported/Python/insert_data_into_single_table.py """ print("Creating single table for conversion.") # Starts the Hyper Process with telemetry enabled to send data to Tableau. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU. with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Creates new Hyper file # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists. with Connection( endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: # Creates schema and table with table_definition. connection.catalog.create_schema( schema=extract_table.table_name.schema_name) connection.catalog.create_table(table_definition=extract_table) # The rows to insert into the "Extract"."Extract" table. data_to_insert = [["DK-13375", "Dennis Kane", 685, "Consumer"], ["EB-13705", "Ed Braxton", 815, "Corporate"]] # Insert the data. with Inserter(connection, extract_table) as inserter: inserter.add_rows(rows=data_to_insert) inserter.execute() print("The connection to the Hyper file has been closed.") print("The Hyper process has been shut down.")
def fn_insert_data_into_hyper_table(self, local_logger, timer, in_dict): timer.start() # Execute the actual insert with Inserter(in_dict['connection'], in_dict['table']) as hyper_insert: hyper_insert.add_rows(rows=in_dict['data']) hyper_insert.execute() local_logger.info(self.locale.gettext('Data has been inserted into Hyper table')) timer.stop()
def df_to_extract(df, output_path): ''' Converts a Pandas dataframe to a Tableau Extract. Parameters ---------- df (pandas dataframe): Dataframe to turn into a Tableau extract output_path (str): Where to create the Tableau extract ''' # Replace nan's with 0 df = df.replace(np.nan, 0.0, regex=True) print('Creating Tableau data extract...') with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(hyper.endpoint, output_path, CreateMode.CREATE_AND_REPLACE) as connection: # Create schema connection.catalog.create_schema('Extract') # Create list of column definitions, based on the datatypes in pandas dataframe dtype_map = { 'int32': SqlType.int(), 'int64': SqlType.big_int(), 'float32': SqlType.double(), 'float64': SqlType.double(), 'datetime64[ns]': SqlType.date(), 'object': SqlType.text() } table_def = [] # Get column headers to loop through them df_columns = list(df) for col_header in df_columns: dtype_str = str(df[col_header].dtype) # Use dtype_str to lookup appropiate SqlType from dtype_map and append new column to table definition table_def.append(TableDefinition.Column(col_header, dtype_map[dtype_str])) # Define table extract_table = TableDefinition(TableName('Extract', 'Extract'), table_def) # Create table connection.catalog.create_table(extract_table) # Insert data with Inserter(connection, extract_table) as inserter: for idx, row in df.iterrows(): inserter.add_row(row) inserter.execute()
def create(): with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: request_data = request.get_json() print(request_data) print("The HyperProcess has started.") object_name = "mealprep.hyper" file_name = os.environ.get('bucket_name') with Connection( endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: print("The connection to the Hyper file is open.") connection.catalog.create_schema('Extract') example_table = TableDefinition(TableName('Extract', 'Extract'), [ TableDefinition.Column('Breakfast', SqlType.text()), TableDefinition.Column('Lunch', SqlType.text()), TableDefinition.Column('Dinner', SqlType.text()), ]) print("The table is defined.") connection.catalog.create_table(example_table) print(example_table) print(type(example_table)) with Inserter(connection, example_table) as inserter: for i in request_data['data']: inserter.add_row([i['breakfast'], i['lunch'], i['dinner']]) inserter.execute() print("The data was added to the table.") print("The connection to the Hyper extract file is closed.") print("The HyperProcess has shut down.") with open('mealprep.hyper', 'rb') as reader: if object_name is None: object_name = file_name s3_client = boto3.client( 's3', aws_access_key_id=os.environ.get('aws_access_key_id'), aws_secret_access_key=os.environ.get('aws_secret_access_key')) try: response = s3_client.upload_fileobj(reader, file_name, object_name) except ClientError as e: logging.error(e) return False return redirect(url_for('index'))
def insert_data(): """ An example demonstrating a simple single-table Hyper file including table creation and data insertion with different types This code is lifted from the below example: https://github.com/tableau/hyper-api-samples/blob/main/Tableau-Supported/Python/insert_data_into_single_table.py """ print("Creating single table for publishing.") # Starts the Hyper Process with telemetry enabled to send data to Tableau. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU. with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Creates new Hyper file "customer.hyper". # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists. with Connection( endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema( schema=extract_table.table_name.schema_name) connection.catalog.create_table(table_definition=extract_table) # The rows to insert into the "Extract"."Extract" table. data_to_insert = [["DK-13375", "Dennis Kane", 685, "Consumer"], ["EB-13705", "Ed Braxton", 815, "Corporate"]] with Inserter(connection, extract_table) as inserter: inserter.add_rows(rows=data_to_insert) inserter.execute() # The table names in the "Extract" schema (the default schema). table_names = connection.catalog.get_table_names("Extract") print(f"Tables available in {path_to_database} are: {table_names}") # Number of rows in the "Extract"."Extract" table. # `execute_scalar_query` is for executing a query that returns exactly one row with one column. row_count = connection.execute_scalar_query( query=f"SELECT COUNT(*) FROM {extract_table.table_name}") print( f"The number of rows in table {extract_table.table_name} is {row_count}." ) print("The connection to the Hyper file has been closed.") print("The Hyper process has been shut down.")
def insert_data_into_hyper_file(data: List[Any], name: str, table_def: TableDefinition): """Helper function that inserts data into a .hyper file.""" # first, create a temp directory on the driver node tmp_dir = f"/tmp/hyperleaup/{name}/" if not os.path.exists(tmp_dir): os.makedirs(tmp_dir) hyper_database_path = f"/tmp/hyperleaup/{name}/{name}.hyper" with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hp: with Connection(endpoint=hp.endpoint, database=hyper_database_path, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema(schema=table_def.table_name.schema_name) connection.catalog.create_table(table_definition=table_def) with Inserter(connection, table_def) as inserter: inserter.add_rows(rows=data) inserter.execute() return hyper_database_path
def _query_result_to_hyper_files(self, query_result_iter, target_table_def): """ Writes query output to one or more Hyper files Returns a list of output Hyper files query_result_iter (obj): Iterator containing result rows target_table_def (TableDefinition): Schema for target extract table """ output_hyper_files = [] # TODO: Split output into smaller files when query result > MAX_ROWS_PER_FILE path_to_database = Path(tempfile_name(prefix="temp_", suffix=".hyper")) output_hyper_files.append(path_to_database) with HyperProcess(telemetry=TELEMETRY) as hyper: # Creates new Hyper extract file # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists. with Connection( endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE, ) as connection: connection.catalog.create_schema( schema=target_table_def.table_name.schema_name) connection.catalog.create_table( table_definition=target_table_def) with Inserter(connection, target_table_def) as inserter: inserter.add_rows(query_result_iter()) inserter.execute() row_count = connection.execute_scalar_query( query=f"SELECT COUNT(*) FROM {target_table_def.table_name}" ) logger.info( f"The number of rows in table {target_table_def.table_name} is {row_count}." ) logger.info("The connection to the Hyper file has been closed.") logger.info("The Hyper process has been shut down.") return output_hyper_files
def __create_hyper(self, json_array, table): result = True try: with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU, 'challenge') as hyper: print("Inicia el proceso de extracción.") path_to_database = Path(self.location) with Connection(endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: print("Se abre la conexión.") #connection.catalog.create_schema('Extract') print("La tabla queda definida") connection.catalog.create_table(table) with Inserter(connection, table) as inserter: for prod in json_array: #le doy un tratamiento especial al campo "catalog_listing" catalog = False try: catalog = prod["catalog_listing"] except: pass aux=[prod["id"],prod["site_id"],prod["title"],json.dumps(prod["seller"]),str(prod["price"]),json.dumps(prod["prices"]),prod["sale_price"],prod["currency_id"],prod["available_quantity"], prod["sold_quantity"],prod["buying_mode"],prod["listing_type_id"],prod["stop_time"],prod["condition"],prod["permalink"], prod["thumbnail"],prod["accepts_mercadopago"],json.dumps(prod["installments"]),json.dumps(prod["address"]),json.dumps(prod["shipping"]),json.dumps(prod["seller_address"]), json.dumps(prod["attributes"]),str(prod["original_price"]),prod["category_id"],prod["official_store_id"],prod["domain_id"],prod["catalog_product_id"], json.dumps(prod["tags"]),catalog,prod["order_backend"]] inserter.add_row( aux ) inserter.execute() print("La data fue ingresada a la tabla") print("La conexión con el archivo hyper fue cerrada.") print("Cerramos el HyperProcess.") except Exception as e: print("Error: ejecutando el proceso createHyper con status code: "+ str(e)) result=False return result
def createHyperFile(): dict = parseData() file = "/Users/jharris/Desktop/workbookUsers.hyper" cols = dict['cols'] data = dict['data'] with HyperProcess( telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(hyper.endpoint, file, CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema('Extract') table = TableDefinition( TableName('Extract', 'Extract'), [TableDefinition.Column(col, SqlType.text()) for col in cols]) connection.catalog.create_table(table) with Inserter(connection, table) as inserter: inserter.add_rows(rows=data) inserter.execute()
def run_insert_spatial_data_to_a_hyper_file(): """ An example of how to add spatial data to a Hyper file. """ print("EXAMPLE - Add spatial data to a Hyper file ") path_to_database = Path("spatial_data.hyper") # Starts the Hyper Process with telemetry enabled to send data to Tableau. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU. with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Creates new Hyper file "spatial_data.hyper". # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists. with Connection( endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema( schema=extract_table.table_name.schema_name) connection.catalog.create_table(table_definition=extract_table) # Hyper API's Inserter allows users to transform data during insertion. # To make use of data transformation during insertion, the inserter requires the following inputs # 1. The connection to the Hyper instance containing the table. # 2. The table name or table defintion into which data is inserted. # 3. List of Inserter.ColumnMapping. # This list informs the inserter how each column in the target table is tranformed. # The list must contain all the columns into which data is inserted. # "Inserter.ColumnMapping" maps a valid SQL expression (if any) to a column in the target table. # For example Inserter.ColumnMapping('target_column_name', f'{escape_name("colA")}*{escape_name("colB")}') # The column "target_column" contains the product of "colA" and "colB" after successful insertion. # SQL expression string is optional in Inserter.ColumnMapping. # For a column without any transformation only the column name is required. # For example Inserter.ColumnMapping('no_data_transformation_column') # 4. The Column Definition of all input values provided to the Inserter # Inserter definition contains the column definition for the values that are inserted # The data input has two text values Name and Location_as_text inserter_definition = [ TableDefinition.Column(name='Name', type=SqlType.text(), nullability=NOT_NULLABLE), TableDefinition.Column(name='Location_as_text', type=SqlType.text(), nullability=NOT_NULLABLE) ] # Column 'Name' is inserted into "Extract"."Extract" as-is. # Column 'Location' in "Extract"."Extract" of geography type is computed from Column 'Location_as_text' of text type # using the expression 'CAST("Location_as_text") AS GEOGRAPHY'. # Inserter.ColumnMapping is used for mapping the CAST expression to Column 'Location'. column_mappings = [ 'Name', Inserter.ColumnMapping( 'Location', f'CAST({escape_name("Location_as_text")} AS GEOGRAPHY)') ] # Data to be inserted. data_to_insert = [['Seattle', "point(-122.338083 47.647528)"], ['Munich', "point(11.584329 48.139257)"]] # Insert data into "Extract"."Extract" table with CAST expression. with Inserter(connection, extract_table, column_mappings, inserter_definition=inserter_definition) as inserter: inserter.add_rows(rows=data_to_insert) inserter.execute() print("The data was added to the table.") print("The connection to the Hyper file has been closed.") print("The Hyper process has been shut down.")
def create(): with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: request_data = request.get_json() print("The HyperProcess has started.") object_name = "tdxdemo.hyper" file_name = os.environ.get('bucket_name') with Connection( endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: print("The connection to the Hyper file is open.") connection.catalog.create_schema('Extract') example_table = TableDefinition(TableName('Extract', 'Extract'), [ TableDefinition.Column('activityId', SqlType.big_int()), TableDefinition.Column('activityType', SqlType.text()), TableDefinition.Column('contactId', SqlType.big_int()), TableDefinition.Column('industry', SqlType.text()), TableDefinition.Column('accountId', SqlType.text()), TableDefinition.Column('accountName', SqlType.text()), TableDefinition.Column('activityDate', SqlType.text()), TableDefinition.Column('company', SqlType.text()), TableDefinition.Column('name', SqlType.text()), TableDefinition.Column('activitiesTotal', SqlType.big_int()), TableDefinition.Column('activitiesPerContact', SqlType.big_int()), TableDefinition.Column('contactsTotal', SqlType.big_int()), TableDefinition.Column('totalFormsSubmitted', SqlType.big_int()), TableDefinition.Column('totalPageViews', SqlType.big_int()), TableDefinition.Column('totalWebVisits', SqlType.big_int()), ]) print("The table is defined.") connection.catalog.create_table(example_table) with Inserter(connection, example_table) as inserter: for i in request_data: inserter.add_row([ i['activityId'], i['activityType'], i['contactId'], i['industry'], i['accountId'], i['accountName'], i['activityDate'], i['company'], i['name'], i['activitiesTotal'], i['activitiesPerContact'], i['contactsTotal'], i['totalFormsSubmitted'], i['totalPageViews'], i['totalWebVisits'] ]) inserter.execute() print("The data was added to the table.") print("The connection to the Hyper extract file is closed.") print("The HyperProcess has shut down.") with open('tdxdemo.hyper', 'rb') as reader: if object_name is None: object_name = file_name s3_client = boto3.client( 's3', aws_access_key_id=os.environ.get('aws_access_key_id'), aws_secret_access_key=os.environ.get('aws_secret_access_key')) try: response = s3_client.upload_fileobj( reader, file_name, object_name, ExtraArgs={'ACL': 'public-read'}) except ClientError as e: logging.error(e) return False return redirect(url_for('index'))
def run_insert_data_into_multiple_tables(): """ An example of how to create and insert data into a multi-table Hyper file where tables have different types """ print("EXAMPLE - Insert data into multiple tables within a new Hyper file") path_to_database = Path("superstore.hyper") # Starts the Hyper Process with telemetry enabled to send data to Tableau. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU. with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Creates new Hyper file "superstore.hyper". # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists. with Connection( endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: # Create multiple tables. connection.catalog.create_table(table_definition=orders_table) connection.catalog.create_table(table_definition=customer_table) connection.catalog.create_table(table_definition=products_table) connection.catalog.create_table(table_definition=line_items_table) # Insert data into Orders table. orders_data_to_insert = [[ 399, "DK-13375", datetime(2012, 9, 7), "CA-2011-100006", datetime(2012, 9, 13), "Standard Class" ], [ 530, "EB-13705", datetime(2012, 7, 8), "CA-2011-100090", datetime(2012, 7, 12), "Standard Class" ]] with Inserter(connection, orders_table) as inserter: inserter.add_rows(rows=orders_data_to_insert) inserter.execute() # Insert data into Customers table. customer_data_to_insert = [[ "DK-13375", "Dennis Kane", 518, "Consumer" ], ["EB-13705", "Ed Braxton", 815, "Corporate"]] with Inserter(connection, customer_table) as inserter: inserter.add_rows(rows=customer_data_to_insert) inserter.execute() # Insert individual row into Product table. with Inserter(connection, products_table) as inserter: inserter.add_row(row=[ "TEC-PH-10002075", "Technology", "Phones", "AT&T EL51110 DECT" ]) inserter.execute() # Insert data into Line Items table. line_items_data_to_insert = [[ 2718, "CA-2011-100006", "TEC-PH-10002075", 377.97, 3, 0.0, 109.6113 ], [ 2719, "CA-2011-100090", "TEC-PH-10002075", 377.97, 3, None, 109.6113 ]] with Inserter(connection, line_items_table) as inserter: inserter.add_rows(rows=line_items_data_to_insert) inserter.execute() tables = [ orders_table, customer_table, products_table, line_items_table ] for table in tables: # `execute_scalar_query` is for executing a query that returns exactly one row with one column. row_count = connection.execute_scalar_query( query=f"SELECT COUNT(*) FROM {table.table_name}") print( f"The number of rows in table {table.table_name} is {row_count}." ) print("The connection to the Hyper file has been closed.") print("The Hyper process has been shut down.")
def insert_data(self, data): with Inserter(self._connection, self._table_name) as inserter: inserter.add_rows(rows=data) inserter.execute()
class TableauTableWriter(object): """ Wrapper class for writing a Tableau Hyper file from a DSS dataset. """ def __init__(self, schema_name, table_name): """ :param schema_name: name of the target schema :param table_name: name of the target table """ self.row_index = 0 self.data = [] self.batch_size = 2000 self.schema_name = schema_name self.table_name = table_name self.output_file = None self.is_geo_table = False self.schema_converter = SchemaConversion() # Tableau Hyper related objects self.hyper = None self.connection = None self.tmp_table_definition = None self.output_table_definition = None self.tmp_table_inserter = None self.output_table_inserter = None def create_schema(self, schema_dss, destination_file_path): """ Read the Tableau Hyper file an. :param schema_dss: DSS schema from the DSS dataset to export example: [{"columns": [{"name": "customer_id", "type": "bigint"}, ...]}, ...] :param destination_file_path: :return: """ # Read the destination file of the dss self.output_file = destination_file_path logger.info( "Writing the Tableau Hyper file to the following location: {}". format(destination_file_path)) logger.info( "The dataset to export has the following schema: {}".format( schema_dss)) dss_columns = schema_dss['columns'] dss_storage_types = [ column_descriptor['type'] for column_descriptor in dss_columns ] self.schema_converter.set_dss_storage_types(dss_storage_types) self.is_geo_table = dss_is_geo(schema_dss) logger.info("The input dataset contains a geo column: {}".format( self.is_geo_table)) if not self.schema_name or not self.table_name: logger.warning("Did not received the table or schema name.") raise ValueError("No valid schema or table name received.") logger.info("Received target schema {} and table {}".format( self.schema_name, self.table_name)) # Create the Tableau Hyper schema from the DSS schema self.output_table_definition = TableDefinition( TableName(self.schema_name, self.table_name), self.schema_converter.dss_columns_to_hyper_columns(dss_columns)) # Open connection to file self.hyper = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) self.connection = Connection(self.hyper.endpoint, self.output_file, CreateMode.CREATE_AND_REPLACE) assert self.connection is not None self.connection.catalog.create_schema(self.schema_name) self.connection.catalog.create_table(self.output_table_definition) # Handle the geo case if self.is_geo_table: logger.info("Detected geo column. Creating a temporary table...") dss_tmp_schema = geo_to_text(schema_dss) dss_tmp_columns = dss_tmp_schema['columns'] self.tmp_table_definition = TableDefinition( TableName(self.schema_name, "tmp_" + self.table_name), self.schema_converter.dss_columns_to_hyper_columns( dss_tmp_columns)) self.connection.catalog.create_table(self.tmp_table_definition) logger.info("Created temporary table") def write_row(self, row): """ Handle one row of data to export :param row: a tuple with N strings matching the schema passed to open method """ try: hyper_compliant_row = self.schema_converter.prepare_row_to_hyper( row) self.data.append(hyper_compliant_row) self.row_index += 1 if self.row_index % self.batch_size == 0: logger.info("Writing {} lines to hyper file".format( len(self.data))) self.update_table() # send data to hyper file, flush buffer self.data = [] except Exception as err: logger.warning( "Failed to perform writing on following row:\n{}".format(row)) raise err return True def update_table(self): """ Perform an update of the Tableau Hyper file with new data """ # if there is a geo table, create an intermediate and temporary table if self.is_geo_table: self.tmp_table_inserter = Inserter(self.connection, self.tmp_table_definition) self.tmp_table_inserter.add_rows(self.data) self.tmp_table_inserter.execute() self.tmp_table_inserter.close() self.connection.execute_command( command= f"INSERT INTO {self.output_table_definition.table_name} SELECT * FROM {self.tmp_table_definition.table_name};" ) self.connection.execute_command( command= f"TRUNCATE TABLE {self.tmp_table_definition.table_name};") else: if self.connection is None: logger.warning("Connection to Tableau Hyper file is undefined") self.output_table_inserter = Inserter(self.connection, self.output_table_definition) self.output_table_inserter.add_rows(self.data) self.output_table_inserter.execute() self.output_table_inserter.close() return True def close(self): """ Release the Tableau Hyper connections """ logger.info("Closing export ...") if self.data: logger.info("Performing final data update...") self.update_table() self.data = [] logger.info("Closing Tableau Hyper connections...") if self.is_geo_table: self.connection.execute_command( command=f"DROP TABLE {self.tmp_table_definition.table_name};") self.hyper.close() self.connection.close() logger.info("Closed export") return True
def run_insert_data_with_expressions(): """ An example of how to push down computations to Hyper during insertion with expressions. """ print("EXAMPLE - Push down computations to Hyper during insertion with expressions") path_to_database = Path("orders.hyper") # Starts the Hyper Process with telemetry enabled to send data to Tableau. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU. with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Creates new Hyper file "orders.hyper". # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists. with Connection(endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema(schema=extract_table.table_name.schema_name) connection.catalog.create_table(table_definition=extract_table) # Hyper API's Inserter allows users to transform data during insertion. # To make use of data transformation during insertion, the inserter requires the following inputs # 1. The connection to the Hyper instance containing the table. # 2. The table name or table defintion into which data is inserted. # 3. List of Inserter.ColumnMapping. # This list informs the inserter how each column in the target table is tranformed. # The list must contain all the columns into which data is inserted. # "Inserter.ColumnMapping" maps a valid SQL expression (if any) to a column in the target table. # For example Inserter.ColumnMapping('target_column_name', f'{escape_name("colA")}*{escape_name("colB")}') # The column "target_column" contains the product of "colA" and "colB" after successful insertion. # SQL expression string is optional in Inserter.ColumnMapping. # For a column without any transformation only the column name is required. # For example Inserter.ColumnMapping('no_data_transformation_column') # 4. The Column Definition of all input values provided to the Inserter # Inserter definition contains the column definition for the values that are inserted inserter_definition = [ TableDefinition.Column(name='Order ID', type=SqlType.int(), nullability=NOT_NULLABLE), TableDefinition.Column(name='Ship Timestamp Text', type=SqlType.text(), nullability=NOT_NULLABLE), TableDefinition.Column(name='Ship Mode', type=SqlType.text(), nullability=NOT_NULLABLE), TableDefinition.Column(name='Ship Priority Text', type=SqlType.text(), nullability=NOT_NULLABLE)] # Column 'Order Id' is inserted into "Extract"."Extract" as-is # Column 'Ship Timestamp' in "Extract"."Extract" of timestamp type is computed from Column 'Ship Timestamp Text' of text type using 'to_timestamp()' # Column 'Ship Mode' is inserted into "Extract"."Extract" as-is # Column 'Ship Priority' is "Extract"."Extract" of integer type is computed from Colum 'Ship Priority Text' of text type using 'CASE' statement shipPriorityAsIntCaseExpression = f'CASE {escape_name("Ship Priority Text")} ' \ f'WHEN {escape_string_literal("Urgent")} THEN 1 ' \ f'WHEN {escape_string_literal("Medium")} THEN 2 ' \ f'WHEN {escape_string_literal("Low")} THEN 3 END' column_mappings = [ 'Order ID', Inserter.ColumnMapping( 'Ship Timestamp', f'to_timestamp({escape_name("Ship Timestamp Text")}, {escape_string_literal("YYYY-MM-DD HH24:MI:SS")})'), 'Ship Mode', Inserter.ColumnMapping('Ship Priority', shipPriorityAsIntCaseExpression) ] # Data to be inserted data_to_insert = [ [399, '2012-09-13 10:00:00', 'Express Class', 'Urgent'], [530, '2012-07-12 14:00:00', 'Standard Class', 'Low'] ] # Insert data into "Extract"."Extract" table with expressions with Inserter(connection, extract_table, column_mappings, inserter_definition=inserter_definition) as inserter: inserter.add_rows(rows=data_to_insert) inserter.execute() print("The data was added to the table.") print("The connection to the Hyper file has been closed.") print("The Hyper process has been shut down.")
def insert_box_events(): # Hyper file instantiation path_to_database = Path(box_hyper_file) hyper_file_exists = Path.exists(path_to_database) # Start the Hyper API pricess with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Check if the Hyper file exists or not. CreateMode.NONE will append. CreateMode.CREATE_AND_REPLACE will create a net new file create_mode = None if hyper_file_exists: create_mode = CreateMode.NONE else: create_mode = CreateMode.CREATE_AND_REPLACE # Open a new connection with Connection(endpoint=hyper.endpoint, database=path_to_database, create_mode=create_mode) as connection: # Check a new schema if it does not exist connection.catalog.create_schema_if_not_exists(schema=box_schema) # Instantiate the table schema box_events_table_def = TableDefinition( table_name=TableName(box_schema, box_events_table), columns=[ TableDefinition.Column(name='event_id', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='event_type', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='created_at', type=SqlType.timestamp_tz(), nullability=NULLABLE), TableDefinition.Column(name='created_by_id', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='created_by_name', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='created_by_login', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='source', type=SqlType.json(), nullability=NULLABLE), TableDefinition.Column(name='ip_address', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='additional_details', type=SqlType.json(), nullability=NULLABLE) ]) print('Found schema: {0} and table def: {1}'.format( box_events_table_def.table_name.schema_name, box_events_table_def.table_name)) # Create the table if it does not exist and get the Box events table connection.catalog.create_table_if_not_exists( table_definition=box_events_table_def) table_name = TableName(box_schema, box_events_table) # Get the MAX row by created_at last_event_created_at = connection.execute_scalar_query( query= f"SELECT MAX(created_at) FROM {box_events_table_def.table_name}" ) if last_event_created_at is not None: print('Found last event in hyper file: {0}'.format( last_event_created_at.to_datetime())) # Get the Box service account client auth = JWTAuth.from_settings_file(box_config) box_client = Client(auth) service_account = box_client.user().get() print( 'Found Service Account with name: {0}, id: {1}, and login: {2}' .format(service_account.name, service_account.id, service_account.login)) # Get the current date and the date for one month ago if there is not lastest event today = datetime.utcnow() if last_event_created_at is None: last_event_created_at = today - relativedelta.relativedelta( months=month_lookback) else: last_event_created_at = last_event_created_at.to_datetime( ).replace(tzinfo=timezone.utc).astimezone(tz=None) # Get the Box enterprise events for a given date range print( 'Using date range for events today: {0} and starting datetime: {1}' .format(today, last_event_created_at)) get_box_events(box_client, 0, last_event_created_at, today) # Insert the Box enteprise events into the Hyper file with Inserter(connection, box_events_table_def) as inserter: inserter.add_rows(rows=box_events) inserter.execute() # Number of rows in the "Box"."Events" table. row_count = connection.execute_scalar_query( query=f"SELECT COUNT(*) FROM {table_name}") print(f"The number of rows in table {table_name} is {row_count}.") print("The connection to the Hyper file has been closed.") print("The Hyper process has been shut down.")
def Full_refresh(result): LogFileWrite("Running Full refresh") try: with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyperprocess: print("The HyperProcess has started.") LogFileWrite("The HyperProcess has started.") print(hyperprocess.is_open) if hyperprocess.is_open==True: with Connection(hyperprocess.endpoint, 'Facebook_campaigns.hyper', CreateMode.CREATE_AND_REPLACE,) as connection: if connection.is_open==True: print("The connection to the Hyper file is open.") LogFileWrite("The connection to the Hyper file is open.") connection.catalog.create_schema('Extract') DataTable = TableDefinition(TableName('Extract','Campaign_data'),[ ############Below Columns are extracted from Report data API TableDefinition.Column('Row_ID', SqlType.big_int()), TableDefinition.Column('Inserted Date', SqlType.date()), TableDefinition.Column('Date', SqlType.date()), TableDefinition.Column('Account Id', SqlType.varchar(50)), TableDefinition.Column('Account Name', SqlType.text()), TableDefinition.Column('Campaign Id', SqlType.varchar(50)), TableDefinition.Column('Campaign Name', SqlType.text()), TableDefinition.Column('Impressions', SqlType.big_int()), TableDefinition.Column('Clicks', SqlType.big_int()), TableDefinition.Column('Reach', SqlType.big_int()), TableDefinition.Column('Spend', SqlType.double()), TableDefinition.Column('Frequency', SqlType.double()), ]) print("The table is defined.") LogFileWrite("Successfully Facebook Campaign Table is defined") connection.catalog.create_table(DataTable) # print(Campaign_df["Id"].dtype) #print(range(len(Campaign_df["Id"]))) with Inserter(connection, TableName('Extract','Campaign_data')) as inserter: inserted_rows=1 row_id=1 for i in range(0,len(result["Campaign Id"])): #print(str(result.loc[i,"CampaignId"])) #print(result.loc[i,"Date"]) inserter.add_row([ int(row_id), datetime.today(), (datetime.strptime(result.loc[i,"Date"], '%Y-%m-%d')), #(datetime.date(result.loc[i,"Date"])),#, "%Y-%m-%d")), str(result.loc[i,"Account Id"]), str(result.loc[i,"Account Name"]), str(result.loc[i,"Campaign Id"]), str(result.loc[i,"Campaign Name"]), int(result.loc[i,"Impressions"]), int(result.loc[i,"Clicks"]), int(result.loc[i,"Reach"]), float(result.loc[i,"Spend"]), float(result.loc[i,"Frequency"]) ]) #print("instered") row_id=row_id+1 inserted_rows=inserted_rows+1 inserter.execute() print("Instered Rows are " +str(inserted_rows)) LogFileWrite("Instered Rows are " +str(inserted_rows)) table_name=TableName('Extract','Campaign_data') Delet_query=f"DELETE FROM {table_name} WHERE " +'"'+ 'Row_ID'+'"'+" NOT IN(" Delet_query+="SELECT MAX("+'"'+'Row_ID'+'"'+f") FROM {table_name} " Delet_query+="GROUP BY " +'"'+'Date'+'",'+'"'+'Campaign Id'+'",'+'"'+'Campaign Name'+'",' Delet_query+='"'+'Account Id'+'",'+'"'+'Impressions'+'",' Delet_query+='"'+'Clicks'+'",'+'"'+'Account Name'+'",'+'"'+'Reach'+'",'+'"'+'Spend'+'",' Delet_query+='"'+'Frequency'+'")' #print(Delet_query) connection.execute_command(Delet_query) print("Deleted Duplicate rows") LogFileWrite("Successfully deleted Duplicate rows") else: print("unable to open connection to hyper file") LogFileWrite("unable to open connection to hyper file") if connection.is_open==True: connection.close() print("Connection to Hyper file closed") LogFileWrite("Connection to Hyper file closed") else: print("Connection to Hyper file closed") LogFileWrite("Connection to Hyper file closed") print("Connection is open or closed" + str(connection.is_open)) else: print("Unable to start the Hyper process ") LogFileWrite("Unable to start the Hyper process ") if hyperprocess.is_open==True: hyperprocess.close() print("Forcefully shutted down the Hyper Process") LogFileWrite("Forcefully shutted down the Hyper Process") else: print("Hyper process is shutted down") LogFileWrite("Hyper process is shutted down") print("Connection is open or closed" + str(connection.is_open)) print("process is open or closed" + str(hyperprocess.is_open)) except HyperException as ex: LogFileWrite("There is exception in starting Tableau Hyper Process. Exiting...") LogFileWrite(str(ex)) connection.close() hyperprocess.close() SendEmailMessage() sys.exit()
# Define the hyper table (Same definition as the full refresh) example_table = TableDefinition(TableName('Extract', 'Extract'), [ TableDefinition.Column('column_ex', SqlType.varchar(500)), TableDefinition.Column('column_ex2', SqlType.varchar(500)), TableDefinition.Column('column_ex_int', SqlType.int()), TableDefinition.Column('column_ex3', SqlType.varchar(500)), TableDefinition.Column('column_ex_int2', SqlType.int()), TableDefinition.Column('column_ex4', SqlType.varchar(500)), TableDefinition.Column('column_ex5', SqlType.varchar(500)), TableDefinition.Column('column_ex_int3', SqlType.int()), TableDefinition.Column('column_ex6', SqlType.varchar(500)), ]) print("The table is defined.") connection.catalog.create_table(table_definition=example_table) # Insert data from dataframe to hyper table with Inserter(connection, example_table) as inserter: for i in range(len(query_res)): inserter.add_row( [ query_res['column_ex'][i], \ query_res['column_ex2'][i], \ int(query_res['column_ex_int'][i]), \ query_res['column_ex3'][i], \ int(query_res['column_ex_int2'][i]), \ query_res['column_ex4'][i], \ query_res['column_ex5'][i], \ int(query_res['column_ex_int3'][i]), \ query_res['column_ex6'][i] \ ] ) inserter.execute() table_names = connection.catalog.get_table_names("Extract")
def Incremental_refresh(result): try: with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyperprocess: #print("The HyperProcess has started.") LogFileWrite("The HyperProcess has started.") print(hyperprocess.is_open) if hyperprocess.is_open==True: with Connection(hyperprocess.endpoint, 'Facebook_campaigns.hyper', CreateMode.NONE,) as connection: if connection.is_open==True: print("The connection to the Hyper file is open.") LogFileWrite("The connection to the Hyper file is open.") LogFileWrite("Successfully connected to Facebook Campaign data Table ") # print(Campaign_df["Id"].dtype) #print(range(len(result["Id"]))) table_name=TableName('Extract','Campaign_data') max_rowid_query="SELECT MAX("+'"'+'Row_ID'+'"'+f") FROM {table_name}" row_id=connection.execute_scalar_query(max_rowid_query) row_id=row_id+1 #print(row_id) with Inserter(connection, TableName('Extract','Campaign_data')) as inserter: inserted_rows=1 for i in range(0,len(result["Campaign Id"])): #print(result.loc[i,"Date"]) inserter.add_row([ int(row_id), datetime.today(), (datetime.strptime(result.loc[i,"Date"], '%Y-%m-%d')), str(result.loc[i,"Account Id"]), str(result.loc[i,"Account Name"]), str(result.loc[i,"Campaign Id"]), str(result.loc[i,"Campaign Name"]), int(result.loc[i,"Impressions"]), int(result.loc[i,"Clicks"]), int(result.loc[i,"Reach"]), float(result.loc[i,"Spend"]), float(result.loc[i,"Frequency"]) ]) #print("instered") #i=i+1 inserted_rows=inserted_rows+1 row_id=row_id+1 inserter.execute() #print("Instered Rows are " +str(inserted_rows)) LogFileWrite("Successfully rows are Instered") table_name=TableName('Extract','Campaign_data') Delet_query=f"DELETE FROM {table_name} WHERE " +'"'+ 'Row_ID'+'"'+" NOT IN(" Delet_query+="SELECT MAX("+'"'+'Row_ID'+'"'+f") FROM {table_name} " Delet_query+="GROUP BY " +'"'+'Date'+'",'+'"'+'Campaign Id'+'",'+'"'+'Campaign Name'+'",' Delet_query+='"'+'Account Id'+'",'+'"'+'Impressions'+'",' Delet_query+='"'+'Clicks'+'",'+'"'+'Account Name'+'",'+'"'+'Reach'+'",'+'"'+'Spend'+'",' Delet_query+='"'+'Frequency'+'")' #print(Delet_query) connection.execute_command(Delet_query) print("Deleted Duplicate rows") LogFileWrite("Successfully deleted Duplicate rows") else: print("unable to open connection to hyper file") LogFileWrite("unable to open connection to hyper file") if connection.is_open==True: connection.close() print("Connection to Hyper file closed") LogFileWrite("Connection to Hyper file closed") else: print("Connection to Hyper file closed") LogFileWrite("Connection to Hyper file closed") #print("Connection is open or closed" + str(connection.is_open)) else: print("Unable to start the Hyper process ") LogFileWrite("Unable to start the Hyper process ") if hyperprocess.is_open==True: hyperprocess.close() print("Forcefully shutted down the Hyper Process") LogFileWrite("Forcefully shutted down the Hyper Process") else: print("Hyper process is shutted down") LogFileWrite("Hyper process is shutted down") #print("Connection is open or closed" + str(connection.is_open)) #print("process is open or closed" + str(hyperprocess.is_open)) except HyperException as ex: LogFileWrite("There is exception in starting Tableau Hyper Process. Exiting...") LogFileWrite(str(ex)) connection.close() hyperprocess.close() SendEmailMessage() sys.exit()
TableDefinition.Column('temperature', SqlType.double()), TableDefinition.Column('chance_precipitation', SqlType.double()), TableDefinition.Column('precipitation', SqlType.double()), TableDefinition.Column('wind_speed', SqlType.double()), TableDefinition.Column('wind_gust', SqlType.double()), TableDefinition.Column('visiblity', SqlType.double()), TableDefinition.Column('cloud_cover', SqlType.double()), TableDefinition.Column('relative_humidity', SqlType.double()), TableDefinition.Column('moon_phase', SqlType.double()), TableDefinition.Column('condition', SqlType.text()), ]) # Step 5: Create the table in the connection catalog connection.catalog.create_table(schema) with Inserter(connection, schema) as inserter: for index, row in df.iterrows(): inserter.add_row(row) inserter.execute() print("The connection to the Hyper file is closed.") # In[4]: #config to publish hyper_name = 'weather_forcast.hyper' server_address = 'https://prod-apnortheast-a.online.tableau.com/' site_name = 'demo1996' project_name = 'api_test'