def convert_struct_field(column: StructField) -> TableDefinition.Column: """Converts a Spark StructField to a Tableau Hyper SqlType""" if column.dataType == IntegerType(): sql_type = SqlType.int() elif column.dataType == LongType(): sql_type = SqlType.big_int() elif column.dataType == ShortType(): sql_type = SqlType.small_int() elif column.dataType == DoubleType(): sql_type = SqlType.double() elif column.dataType == FloatType(): sql_type = SqlType.double() elif column.dataType == BooleanType(): sql_type = SqlType.bool() elif column.dataType == DateType(): sql_type = SqlType.date() elif column.dataType == TimestampType(): sql_type = SqlType.timestamp() elif column.dataType == StringType(): sql_type = SqlType.text() else: # Trap the DecimalType case if str(column.dataType).startswith("DecimalType"): # Max precision is only up to 18 decimal places in Tableau Hyper API precision = column.dataType.precision if column.dataType.precision <= 18 else 18 scale = column.dataType.scale sql_type = SqlType.numeric(precision, scale) else: raise ValueError(f'Invalid StructField datatype for column `{column.name}` : {column.dataType}') nullable = NULLABLE if column.nullable else NOT_NULLABLE return TableDefinition.Column(name=column.name, type=sql_type, nullability=nullable)
def test_insert_data_into_hyper_file(self): data = [(1001, "Jane", "Doe"), (1002, "John", "Doe"), (2201, "Elonzo", "Smith")] name = "output" table_def = TableDefinition( table_name=TableName("Extract", "Extract"), columns=[ TableDefinition.Column(name=Name("id"), type=SqlType.big_int(), nullability=NULLABLE), TableDefinition.Column(name=Name("first_name"), type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name=Name("last_name"), type=SqlType.text(), nullability=NULLABLE) ]) path = insert_data_into_hyper_file(data, name, table_def) print(f'Database Path : {path}') tables = TestUtils.get_tables("Extract", "/tmp/hyperleaup/output/output.hyper") assert (len(tables) == 1) num_rows = TestUtils.get_row_count( "Extract", "Extract", "/tmp/hyperleaup/output/output.hyper") assert (num_rows == 3)
def convert_datatype(coldatatype): """ [summary] This converts the datatype of the column of a given dataframe. Args: Datatype of the column in string format Returns: The tableau hyper extract compatible datatype after converting the dataframe datatype and a default value for NaN cases """ datatype = SqlType.text() def_value = '' if 'datetime' in coldatatype.lower(): datatype = SqlType.timestamp() elif 'str' in coldatatype.lower(): datatype = SqlType.text() elif 'boolean' in coldatatype.lower(): datatype = SqlType.bool() elif 'int' in coldatatype.lower(): datatype = SqlType.int() def_value = 0 elif 'float' in coldatatype.lower(): datatype = SqlType.double() def_value = 0 elif 'period' in coldatatype.lower(): datatype = SqlType.interval() elif 'object' in coldatatype.lower(): datatype = SqlType.text() else: datatype = SqlType.text() return (datatype, def_value)
def createHyperTable(): columns = getConfig() page_table = TableDefinition(table_name="Page") for i in range(0, len(columns)): if columns[i]['DataType'] == "String": dt = SqlType.text() elif columns[i]['DataType'] == "Date": dt = SqlType.date() else: dt = SqlType.big_int() page_table.add_column(columns[i]['ColumnName'], dt) return page_table
def create(): with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: request_data = request.get_json() print(request_data) print("The HyperProcess has started.") object_name = "mealprep.hyper" file_name = os.environ.get('bucket_name') with Connection( endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: print("The connection to the Hyper file is open.") connection.catalog.create_schema('Extract') example_table = TableDefinition(TableName('Extract', 'Extract'), [ TableDefinition.Column('Breakfast', SqlType.text()), TableDefinition.Column('Lunch', SqlType.text()), TableDefinition.Column('Dinner', SqlType.text()), ]) print("The table is defined.") connection.catalog.create_table(example_table) print(example_table) print(type(example_table)) with Inserter(connection, example_table) as inserter: for i in request_data['data']: inserter.add_row([i['breakfast'], i['lunch'], i['dinner']]) inserter.execute() print("The data was added to the table.") print("The connection to the Hyper extract file is closed.") print("The HyperProcess has shut down.") with open('mealprep.hyper', 'rb') as reader: if object_name is None: object_name = file_name s3_client = boto3.client( 's3', aws_access_key_id=os.environ.get('aws_access_key_id'), aws_secret_access_key=os.environ.get('aws_secret_access_key')) try: response = s3_client.upload_fileobj(reader, file_name, object_name) except ClientError as e: logging.error(e) return False return redirect(url_for('index'))
def test_get_table_def(self): data = [ (1001, 1, "Jane", "Doe", "2000-05-01", 29.0, False), (1002, 2, "John", "Doe", "1988-05-03", 33.0, False), (2201, 3, "Elonzo", "Smith", "1990-05-03", 21.0, True), (None, None, None, None, None, None, None) # Test Nulls ] df = get_spark_session()\ .createDataFrame(data, ["id", "dept_id", "first_name", "last_name", "dob", "age", "is_temp"])\ .createOrReplaceTempView("employees") df = get_spark_session().sql( "select id, cast(dept_id as short), first_name, " "last_name, dob, age, is_temp from employees") table_def = get_table_def(df, "Extract", "Extract") # Ensure that the Table Name matches assert (table_def.table_name.name == Name("Extract")) # Ensure that the the TableDefinition column names match assert (table_def.get_column(0).name == Name("id")) assert (table_def.get_column(1).name == Name("dept_id")) assert (table_def.get_column(2).name == Name("first_name")) assert (table_def.get_column(3).name == Name("last_name")) assert (table_def.get_column(4).name == Name("dob")) assert (table_def.get_column(5).name == Name("age")) assert (table_def.get_column(6).name == Name("is_temp")) # Ensure that the column data types were converted correctly assert (table_def.get_column(0).type == SqlType.big_int()) assert (table_def.get_column(1).type == SqlType.small_int()) assert (table_def.get_column(2).type == SqlType.text()) assert (table_def.get_column(3).type == SqlType.text()) assert (table_def.get_column(4).type == SqlType.text()) assert (table_def.get_column(5).type == SqlType.double()) assert (table_def.get_column(6).type == SqlType.bool())
def test_convert_struct_field(self): # ensure strings can be converted correctly first_name_col = StructField('first_name', StringType(), False) converted_col = convert_struct_field(first_name_col) assert (converted_col.name == Name('first_name')) assert (converted_col.nullability is NOT_NULLABLE) assert (converted_col.type == SqlType.text()) # ensure dates can be converted correctly date_col = StructField('update_date', DateType(), True) converted_col = convert_struct_field(date_col) assert (converted_col.name == Name('update_date')) assert (converted_col.nullability is NULLABLE) assert (converted_col.type == SqlType.date()) # ensure timestamps can be converted correctly timestamp_col = StructField('created_at', TimestampType(), False) converted_col = convert_struct_field(timestamp_col) assert (converted_col.name == Name('created_at')) assert (converted_col.nullability is NOT_NULLABLE) assert (converted_col.type == SqlType.timestamp())
def df_to_extract(df, output_path): ''' Converts a Pandas dataframe to a Tableau Extract. Parameters ---------- df (pandas dataframe): Dataframe to turn into a Tableau extract output_path (str): Where to create the Tableau extract ''' # Replace nan's with 0 df = df.replace(np.nan, 0.0, regex=True) print('Creating Tableau data extract...') with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(hyper.endpoint, output_path, CreateMode.CREATE_AND_REPLACE) as connection: # Create schema connection.catalog.create_schema('Extract') # Create list of column definitions, based on the datatypes in pandas dataframe dtype_map = { 'int32': SqlType.int(), 'int64': SqlType.big_int(), 'float32': SqlType.double(), 'float64': SqlType.double(), 'datetime64[ns]': SqlType.date(), 'object': SqlType.text() } table_def = [] # Get column headers to loop through them df_columns = list(df) for col_header in df_columns: dtype_str = str(df[col_header].dtype) # Use dtype_str to lookup appropiate SqlType from dtype_map and append new column to table definition table_def.append(TableDefinition.Column(col_header, dtype_map[dtype_str])) # Define table extract_table = TableDefinition(TableName('Extract', 'Extract'), table_def) # Create table connection.catalog.create_table(extract_table) # Insert data with Inserter(connection, extract_table) as inserter: for idx, row in df.iterrows(): inserter.add_row(row) inserter.execute()
def createHyperFile(): dict = parseData() file = "/Users/jharris/Desktop/workbookUsers.hyper" cols = dict['cols'] data = dict['data'] with HyperProcess( telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(hyper.endpoint, file, CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema('Extract') table = TableDefinition( TableName('Extract', 'Extract'), [TableDefinition.Column(col, SqlType.text()) for col in cols]) connection.catalog.create_table(table) with Inserter(connection, table) as inserter: inserter.add_rows(rows=data) inserter.execute()
def hyper_value_to_dss(self, value, tag=SqlType.text().tag): """ Convert the value `value` stored in a Hyper File under the storage type `hyper_type_tag` to the apropriate DSS type. >>> from tableauhyperapi import TypeTag, SqlType >>> assert TypeTag.INT == SqlType.int().tag :param value: Value from the Hyper Dataset :param tag: Storage type under which the value is stored in Hyper :return: output_value : Value compliant to Hyper """ try: # try to retrieve the conversion function conversion_function = self.mapping_hyper_to_dss[tag][1] except Exception as err: logger.warning("Failed to retrieve the conversion function {}: {}".format(tag, err)) raise err try: output_value = conversion_function(value) except Exception as err: logger.warning("Failed to convert value {} of type {} stored as {}".format(value, type(value), tag)) raise err return output_value
def run(self, args): """ Runs the command :param args: Arguments from argparse.Namespace """ input_file = Path(args.input_file) print("Listing tables with spatial columns") # Starts the Hyper Process with telemetry enabled to send data to Tableau. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU. with HyperProcess( telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Connects to existing Hyper file with Connection(endpoint=hyper.endpoint, database=input_file) as connection: catalog = connection.catalog # Iterates over all schemas in the input file for schema_name in catalog.get_schema_names(): # Iterates over all tables in the current schema for table in catalog.get_table_names(schema=schema_name): table_definition = catalog.get_table_definition( name=table) rows_in_table = connection.execute_scalar_query( query=f"SELECT COUNT(*) FROM {table}") spatial_columns = [ c.name for c in table_definition.columns if c.type == SqlType.geography() ] if spatial_columns: print( f"Table {table} with {rows_in_table} rows has" f" {len(spatial_columns)} spatial columns: {spatial_columns}" ) else: print( f"Table {table} with {rows_in_table} rows has no spatial columns" )
import numpy as np import pandas as pd from tqdm import tqdm from tableauhyperapi import ( HyperProcess, Telemetry, Connection, SqlType, TableDefinition, CreateMode, TableName, Inserter, ) dtype_mapper = { "string": SqlType.text(), "str": SqlType.text(), "object": SqlType.text(), "O": SqlType.text(), "int64": SqlType.big_int(), "float64": SqlType.double(), "bool": SqlType.bool(), "datetime64[ns]": SqlType.timestamp(), "timedelta[ns]": SqlType.interval(), "category": SqlType.text(), } def read_hyper(path_to_hyper_file, custom_schema="Extract"): """Read a Tableau Hyper file and turn it into a Pandas DataFrame.
def fn_convert_to_hyper_types(given_type): switcher = { 'empty': SqlType.text(), 'bool': SqlType.bool(), 'int': SqlType.big_int(), 'float-dot': SqlType.double(), 'date-YMD': SqlType.date(), 'date-MDY': SqlType.date(), 'date-DMY': SqlType.date(), 'time-24': SqlType.time(), 'time-12': SqlType.time(), 'datetime-24-YMD': SqlType.timestamp(), 'datetime-12-MDY': SqlType.timestamp(), 'datetime-24-DMY': SqlType.timestamp(), 'str': SqlType.text() } identified_type = switcher.get(given_type) if identified_type is None: identified_type = SqlType.text() return identified_type
def insert_box_events(): # Hyper file instantiation path_to_database = Path(box_hyper_file) hyper_file_exists = Path.exists(path_to_database) # Start the Hyper API pricess with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Check if the Hyper file exists or not. CreateMode.NONE will append. CreateMode.CREATE_AND_REPLACE will create a net new file create_mode = None if hyper_file_exists: create_mode = CreateMode.NONE else: create_mode = CreateMode.CREATE_AND_REPLACE # Open a new connection with Connection(endpoint=hyper.endpoint, database=path_to_database, create_mode=create_mode) as connection: # Check a new schema if it does not exist connection.catalog.create_schema_if_not_exists(schema=box_schema) # Instantiate the table schema box_events_table_def = TableDefinition( table_name=TableName(box_schema, box_events_table), columns=[ TableDefinition.Column(name='event_id', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='event_type', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='created_at', type=SqlType.timestamp_tz(), nullability=NULLABLE), TableDefinition.Column(name='created_by_id', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='created_by_name', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='created_by_login', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='source', type=SqlType.json(), nullability=NULLABLE), TableDefinition.Column(name='ip_address', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='additional_details', type=SqlType.json(), nullability=NULLABLE) ]) print('Found schema: {0} and table def: {1}'.format( box_events_table_def.table_name.schema_name, box_events_table_def.table_name)) # Create the table if it does not exist and get the Box events table connection.catalog.create_table_if_not_exists( table_definition=box_events_table_def) table_name = TableName(box_schema, box_events_table) # Get the MAX row by created_at last_event_created_at = connection.execute_scalar_query( query= f"SELECT MAX(created_at) FROM {box_events_table_def.table_name}" ) if last_event_created_at is not None: print('Found last event in hyper file: {0}'.format( last_event_created_at.to_datetime())) # Get the Box service account client auth = JWTAuth.from_settings_file(box_config) box_client = Client(auth) service_account = box_client.user().get() print( 'Found Service Account with name: {0}, id: {1}, and login: {2}' .format(service_account.name, service_account.id, service_account.login)) # Get the current date and the date for one month ago if there is not lastest event today = datetime.utcnow() if last_event_created_at is None: last_event_created_at = today - relativedelta.relativedelta( months=month_lookback) else: last_event_created_at = last_event_created_at.to_datetime( ).replace(tzinfo=timezone.utc).astimezone(tz=None) # Get the Box enterprise events for a given date range print( 'Using date range for events today: {0} and starting datetime: {1}' .format(today, last_event_created_at)) get_box_events(box_client, 0, last_event_created_at, today) # Insert the Box enteprise events into the Hyper file with Inserter(connection, box_events_table_def) as inserter: inserter.add_rows(rows=box_events) inserter.execute() # Number of rows in the "Box"."Events" table. row_count = connection.execute_scalar_query( query=f"SELECT COUNT(*) FROM {table_name}") print(f"The number of rows in table {table_name} is {row_count}.") print("The connection to the Hyper file has been closed.") print("The Hyper process has been shut down.")
query_res['column_ex'] = query_res['column_ex'].astype(str) query_res['column_ex2'] = query_res['column_ex2'].astype(str) query_res['column_ex3'] = query_res['column_ex3'].astype(str) query_res['column_ex4'] = query_res['column_ex4'].astype(str) query_res['column_ex5'] = query_res['column_ex5'].astype(str) query_res['column_ex6'] = query_res['column_ex6'].astype(str) with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema('Extract') # Define the hyper table (Same definition as the full refresh) example_table = TableDefinition(TableName('Extract', 'Extract'), [ TableDefinition.Column('column_ex', SqlType.varchar(500)), TableDefinition.Column('column_ex2', SqlType.varchar(500)), TableDefinition.Column('column_ex_int', SqlType.int()), TableDefinition.Column('column_ex3', SqlType.varchar(500)), TableDefinition.Column('column_ex_int2', SqlType.int()), TableDefinition.Column('column_ex4', SqlType.varchar(500)), TableDefinition.Column('column_ex5', SqlType.varchar(500)), TableDefinition.Column('column_ex_int3', SqlType.int()), TableDefinition.Column('column_ex6', SqlType.varchar(500)), ]) print("The table is defined.") connection.catalog.create_table(table_definition=example_table) # Insert data from dataframe to hyper table with Inserter(connection, example_table) as inserter: for i in range(len(query_res)): inserter.add_row(
def Full_refresh(result): LogFileWrite("Running Full refresh") try: with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyperprocess: print("The HyperProcess has started.") LogFileWrite("The HyperProcess has started.") print(hyperprocess.is_open) if hyperprocess.is_open==True: with Connection(hyperprocess.endpoint, 'Facebook_campaigns.hyper', CreateMode.CREATE_AND_REPLACE,) as connection: if connection.is_open==True: print("The connection to the Hyper file is open.") LogFileWrite("The connection to the Hyper file is open.") connection.catalog.create_schema('Extract') DataTable = TableDefinition(TableName('Extract','Campaign_data'),[ ############Below Columns are extracted from Report data API TableDefinition.Column('Row_ID', SqlType.big_int()), TableDefinition.Column('Inserted Date', SqlType.date()), TableDefinition.Column('Date', SqlType.date()), TableDefinition.Column('Account Id', SqlType.varchar(50)), TableDefinition.Column('Account Name', SqlType.text()), TableDefinition.Column('Campaign Id', SqlType.varchar(50)), TableDefinition.Column('Campaign Name', SqlType.text()), TableDefinition.Column('Impressions', SqlType.big_int()), TableDefinition.Column('Clicks', SqlType.big_int()), TableDefinition.Column('Reach', SqlType.big_int()), TableDefinition.Column('Spend', SqlType.double()), TableDefinition.Column('Frequency', SqlType.double()), ]) print("The table is defined.") LogFileWrite("Successfully Facebook Campaign Table is defined") connection.catalog.create_table(DataTable) # print(Campaign_df["Id"].dtype) #print(range(len(Campaign_df["Id"]))) with Inserter(connection, TableName('Extract','Campaign_data')) as inserter: inserted_rows=1 row_id=1 for i in range(0,len(result["Campaign Id"])): #print(str(result.loc[i,"CampaignId"])) #print(result.loc[i,"Date"]) inserter.add_row([ int(row_id), datetime.today(), (datetime.strptime(result.loc[i,"Date"], '%Y-%m-%d')), #(datetime.date(result.loc[i,"Date"])),#, "%Y-%m-%d")), str(result.loc[i,"Account Id"]), str(result.loc[i,"Account Name"]), str(result.loc[i,"Campaign Id"]), str(result.loc[i,"Campaign Name"]), int(result.loc[i,"Impressions"]), int(result.loc[i,"Clicks"]), int(result.loc[i,"Reach"]), float(result.loc[i,"Spend"]), float(result.loc[i,"Frequency"]) ]) #print("instered") row_id=row_id+1 inserted_rows=inserted_rows+1 inserter.execute() print("Instered Rows are " +str(inserted_rows)) LogFileWrite("Instered Rows are " +str(inserted_rows)) table_name=TableName('Extract','Campaign_data') Delet_query=f"DELETE FROM {table_name} WHERE " +'"'+ 'Row_ID'+'"'+" NOT IN(" Delet_query+="SELECT MAX("+'"'+'Row_ID'+'"'+f") FROM {table_name} " Delet_query+="GROUP BY " +'"'+'Date'+'",'+'"'+'Campaign Id'+'",'+'"'+'Campaign Name'+'",' Delet_query+='"'+'Account Id'+'",'+'"'+'Impressions'+'",' Delet_query+='"'+'Clicks'+'",'+'"'+'Account Name'+'",'+'"'+'Reach'+'",'+'"'+'Spend'+'",' Delet_query+='"'+'Frequency'+'")' #print(Delet_query) connection.execute_command(Delet_query) print("Deleted Duplicate rows") LogFileWrite("Successfully deleted Duplicate rows") else: print("unable to open connection to hyper file") LogFileWrite("unable to open connection to hyper file") if connection.is_open==True: connection.close() print("Connection to Hyper file closed") LogFileWrite("Connection to Hyper file closed") else: print("Connection to Hyper file closed") LogFileWrite("Connection to Hyper file closed") print("Connection is open or closed" + str(connection.is_open)) else: print("Unable to start the Hyper process ") LogFileWrite("Unable to start the Hyper process ") if hyperprocess.is_open==True: hyperprocess.close() print("Forcefully shutted down the Hyper Process") LogFileWrite("Forcefully shutted down the Hyper Process") else: print("Hyper process is shutted down") LogFileWrite("Hyper process is shutted down") print("Connection is open or closed" + str(connection.is_open)) print("process is open or closed" + str(hyperprocess.is_open)) except HyperException as ex: LogFileWrite("There is exception in starting Tableau Hyper Process. Exiting...") LogFileWrite(str(ex)) connection.close() hyperprocess.close() SendEmailMessage() sys.exit()
def __init__(self): self.location = "./output/meli_challenge_result.hyper" self.test_location = "../output/meli_challenge_result.hyper" self.searchResult_table = TableDefinition('results', [ TableDefinition.Column('id', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('site_id', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('title', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('seller', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('price', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('prices', SqlType.json(), Nullability.NOT_NULLABLE), TableDefinition.Column('sale_price', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('currency_id', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('available_quantity', SqlType.int(), Nullability.NULLABLE), TableDefinition.Column('sold_quantity', SqlType.int(), Nullability.NULLABLE), TableDefinition.Column('buying_mode', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('listing_type_id', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('stop_time', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('condition', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('permalink', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('thumbnail', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('accepts_mercadopago', SqlType.bool(), Nullability.NULLABLE), TableDefinition.Column('installments', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('address', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('shipping', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('seller_address', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('attributes', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('original_price', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('category_id', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('official_store_id', SqlType.int(), Nullability.NULLABLE), TableDefinition.Column('domain_id', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('catalog_product_id', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('tags', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('catalog_listing', SqlType.bool(), Nullability.NULLABLE), TableDefinition.Column('order_backend', SqlType.int(), Nullability.NULLABLE), ])
from tableauhyperapi import HyperProcess, Telemetry, \ Connection, CreateMode, \ NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \ Inserter, \ escape_name, escape_string_literal, \ TableName, Name, \ HyperException # The table is called "Extract" and will be created in the "Extract" schema. # This has historically been the default table name and schema for extracts created by Tableau extract_table = TableDefinition( table_name=TableName("Extract", "Extract"), columns=[ TableDefinition.Column(name='Name', type=SqlType.text(), nullability=NOT_NULLABLE), TableDefinition.Column(name='Location', type=SqlType.geography(), nullability=NOT_NULLABLE) ]) def run_insert_spatial_data_to_a_hyper_file(): """ An example of how to add spatial data to a Hyper file. """ print("EXAMPLE - Add spatial data to a Hyper file ") path_to_database = Path("spatial_data.hyper") # Starts the Hyper Process with telemetry enabled to send data to Tableau.
import csv import tableauserverclient as TSC import yaml from pathlib import Path from tableauhyperapi import HyperProcess, Telemetry, \ Connection, CreateMode, \ NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \ Inserter, \ escape_name, escape_string_literal, \ HyperException #Create table definition to be used in Hyper file workbook_table = TableDefinition( table_name="Workbooks", columns=[ TableDefinition.Column("Project Id", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Content URL", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Created At", SqlType.date(), NOT_NULLABLE), TableDefinition.Column("Id", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Project Name", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("size", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("Updated At", SqlType.date(), NOT_NULLABLE), TableDefinition.Column("Name", SqlType.text(), NOT_NULLABLE) ]) #Name of hyper file that will be generated locally and deployed to Tableau path_to_database = Path("workbooks.hyper") #Name of project where Hyper file will be deployed #This project must exist on your server PROJECT = 'HyperTest'
def run(self, args): """ Runs the command :param args: Arguments from argparse.Namespace """ input_file = Path(args.input_file) output_file = Path(args.output_file) if args.mode == AdjustVertexOrderMode.INVERT: print("Reversing vertex order of polygons in spatial columns") else: print( "Adjusting vertex order of polygons assuming data source with " "flat - earth topology in spatial columns") # Starts the Hyper Process with telemetry enabled to send data to Tableau. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU. with HyperProcess( telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(endpoint=hyper.endpoint) as connection: catalog = connection.catalog # Create output database file, fails if the file already exists catalog.create_database(output_file) # Attach input and output database files catalog.attach_database(input_file, "input") catalog.attach_database(output_file, "output") # Iterate over all schemas in the input file for schema_name in catalog.get_schema_names("input"): # Create the schema in the output file catalog.create_schema_if_not_exists( SchemaName("output", schema_name.name)) # Iterate over all tables in the input schema for in_table in catalog.get_table_names(schema_name): table_definition = catalog.get_table_definition( name=in_table) columns = table_definition.columns out_table = TableName("output", schema_name.name, in_table.name) out_table_definition = TableDefinition( out_table, columns) # Create the table in the output file with the same table definition. # Note that any constraints on the table in the input file # will not be present in the table in the output file catalog.create_table(out_table_definition) spatial_columns = [ c.name for c in columns if c.type == SqlType.geography() ] in_column_modifiers = [ f"geo_{args.mode.value}_vertex_order({c.name})" if c.type == SqlType.geography() else f"{c.name}" for c in columns ] if len(spatial_columns) > 0: print( f"Copying table {in_table} with {len(spatial_columns)} " f"spatial columns: {spatial_columns}...") else: print( f"Copying table {in_table} with no spatial columns..." ) row_count = connection.execute_command( f"INSERT INTO {out_table} SELECT {','.join(in_column_modifiers)} FROM {in_table}" ) print(f" {row_count} rows copied")
# -*- coding: utf-8 -*- """ Created on Thu Jul 2 21:35:47 2020 @author: Manish Chauhan || +91-9774083186 """ from tableauhyperapi import HyperProcess, Inserter, Telemetry, Connection, CreateMode, NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, escape_name, escape_string_literal, HyperException, TableName # Starting local hyper instance extract_table = TableDefinition( table_name=TableName('extract', 'extract'), columns=[ TableDefinition.Column("Sno", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Name", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Sale", SqlType.big_int()), TableDefinition.Column("Profit", SqlType.big_int()), #TableDefinition.Column("Date", SqlType.date()) ]) data_to_insert = [["1", "Vishakha", 99999, 80]] def run_create_hyper_file_from_csv(): print("Inside Fucntion to pick data from CSV into table in new Hyper file") with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists with Connection( endpoint=hyper.endpoint,
x.append(workbook.name) y.append(workbook.owner_id) for users in y: z.append(server.users.get_by_id(users)) with open('workbook_info.csv', 'w', newline='') as f: writer = csv.writer(f) writer.writerows(zip(x, y, z)) # HYPER API TO CREATE HYPER FILE Workbook_Info_table = TableDefinition( table_name="Workbook_Info", columns=[ TableDefinition.Column("Workbook Name", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Workbook LUID", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Owner Info", SqlType.text(), NOT_NULLABLE) ] ) def run_create_hyper_file_from_csv(): path_to_database = Path("Workbook_Info.hyper") process_parameters = { "log_file_max_count": "2", "log_file_size_limit": "100M" } with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU, parameters=process_parameters) as hyper:
""" # Change these to match your use case. hyper_file_path = "hyper_for_csv.hyper" table_name = TableName("Extract", "Extract") output_name = "output.csv" path_to_database = Path(hyper_file_path) # The table is called "Extract" and will be created in the "Extract" schema # and contains four columns. extract_table = TableDefinition( table_name=table_name, columns=[ TableDefinition.Column(name='Customer ID', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='Customer Name', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='Loyalty Reward Points', type=SqlType.big_int(), nullability=NOT_NULLABLE), TableDefinition.Column(name='Segment', type=SqlType.text(), nullability=NULLABLE) ]) def insert_data(): """
# Step 1: Start Hyper instance with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: #step 2: Start hyper process with Connection(endpoint=hyper.endpoint, create_mode=CreateMode.CREATE_AND_REPLACE, database=PATH_TO_HYPER) as connection: print( connection.execute_scalar_query( query="SELECT'securing connections with endpoint!'")) #create table def with Table name empdetails = TableDefinition( table_name=TableName('Extract', 'empdetails'), columns=[ TableDefinition.Column('EMP_ID', SqlType.int()), TableDefinition.Column('EMP_NAME', SqlType.text()), TableDefinition.Column('MONTHY_SALARY', SqlType.int()), TableDefinition.Column('UNI_FLAG', SqlType.int()) ]) #Create the table in the connection catalog connection.catalog.create_schema('Extract') connection.catalog.create_table(empdetails) print('Validating the data...') cmd = f"COPY{empdetails.table_name} from {escape_string_literal(PATH_TO_CSV)}\ with " "(format csv , NULL 'NULL', delimiter ',' , HEADER)" table_count = connection.execute_command(cmd) total_time = int((time.time() - start)) total_time = "{:.2f}".format(total_time)
def run_insert_spatial_data_to_a_hyper_file(): """ An example of how to add spatial data to a Hyper file. """ print("EXAMPLE - Add spatial data to a Hyper file ") path_to_database = Path("spatial_data.hyper") # Starts the Hyper Process with telemetry enabled to send data to Tableau. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU. with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Creates new Hyper file "spatial_data.hyper". # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists. with Connection( endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema( schema=extract_table.table_name.schema_name) connection.catalog.create_table(table_definition=extract_table) # Hyper API's Inserter allows users to transform data during insertion. # To make use of data transformation during insertion, the inserter requires the following inputs # 1. The connection to the Hyper instance containing the table. # 2. The table name or table defintion into which data is inserted. # 3. List of Inserter.ColumnMapping. # This list informs the inserter how each column in the target table is tranformed. # The list must contain all the columns into which data is inserted. # "Inserter.ColumnMapping" maps a valid SQL expression (if any) to a column in the target table. # For example Inserter.ColumnMapping('target_column_name', f'{escape_name("colA")}*{escape_name("colB")}') # The column "target_column" contains the product of "colA" and "colB" after successful insertion. # SQL expression string is optional in Inserter.ColumnMapping. # For a column without any transformation only the column name is required. # For example Inserter.ColumnMapping('no_data_transformation_column') # 4. The Column Definition of all input values provided to the Inserter # Inserter definition contains the column definition for the values that are inserted # The data input has two text values Name and Location_as_text inserter_definition = [ TableDefinition.Column(name='Name', type=SqlType.text(), nullability=NOT_NULLABLE), TableDefinition.Column(name='Location_as_text', type=SqlType.text(), nullability=NOT_NULLABLE) ] # Column 'Name' is inserted into "Extract"."Extract" as-is. # Column 'Location' in "Extract"."Extract" of geography type is computed from Column 'Location_as_text' of text type # using the expression 'CAST("Location_as_text") AS GEOGRAPHY'. # Inserter.ColumnMapping is used for mapping the CAST expression to Column 'Location'. column_mappings = [ 'Name', Inserter.ColumnMapping( 'Location', f'CAST({escape_name("Location_as_text")} AS GEOGRAPHY)') ] # Data to be inserted. data_to_insert = [['Seattle', "point(-122.338083 47.647528)"], ['Munich', "point(11.584329 48.139257)"]] # Insert data into "Extract"."Extract" table with CAST expression. with Inserter(connection, extract_table, column_mappings, inserter_definition=inserter_definition) as inserter: inserter.add_rows(rows=data_to_insert) inserter.execute() print("The data was added to the table.") print("The connection to the Hyper file has been closed.") print("The Hyper process has been shut down.")
# ----------------------------------------------------------------------------- from pathlib import Path from tableauhyperapi import HyperProcess, Telemetry, \ Connection, CreateMode, \ NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \ Inserter, \ escape_name, escape_string_literal, \ HyperException customer_table = TableDefinition( # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace. table_name="test02", columns=[ TableDefinition.Column("ID", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Mathematics", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Science", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("English", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("Japanese", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("History", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Geography", SqlType.text(), NOT_NULLABLE), ] ) def run_create_hyper_file_from_csv(): """ An example demonstrating loading data from a csv into a new Hyper file """ print("EXAMPLE - Load data from CSV into table in new Hyper file")
with HyperProcess( telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Creates a new Hyper file or # replaces the file if it already exists. with Connection(endpoint=hyper.endpoint, database="drinks.hyper", create_mode=CreateMode.CREATE_AND_REPLACE) as connection: # default schema name is "public" # define table drinks_table = TableDefinition( table_name="drinks", columns=[ TableDefinition.Column("country", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("beer_servings", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("spirit_servings", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("wine_servings", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("total_litres_of_pure_alcohol", SqlType.double(), NOT_NULLABLE), TableDefinition.Column("continent", SqlType.text(), NOT_NULLABLE) ]) # create tables connection.catalog.create_table(drinks_table)
# # ----------------------------------------------------------------------------- from pathlib import Path from tableauhyperapi import HyperProcess, Telemetry, \ Connection, CreateMode, \ NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \ Inserter, \ escape_name, escape_string_literal, \ HyperException customer_table = TableDefinition( # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace. table_name="Customer", columns=[ TableDefinition.Column("Customer ID", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Customer Name", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Loyalty Reward Points", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("Segment", SqlType.text(), NOT_NULLABLE) ]) def run_create_hyper_file_from_csv(): """ An example demonstrating loading data from a csv into a new Hyper file """ print("EXAMPLE - Load data from CSV into table in new Hyper file") path_to_database = Path("customer.hyper")
def _hyper_sql_type(self, source_column): """ Finds the correct Hyper column type for source_column source_column (obj): Source column (Instance of google.cloud.bigquery.schema.SchemaField) Returns a tableauhyperapi.SqlType Object """ source_column_type = source_column.field_type return_sql_type = { "BOOL": SqlType.bool(), "BYTES": SqlType.bytes(), "DATE": SqlType.date(), "DATETIME": SqlType.timestamp(), "INT64": SqlType.big_int(), "INTEGER": SqlType.int(), "NUMERIC": SqlType.numeric(18, 9), "FLOAT64": SqlType.double(), "STRING": SqlType.text(), "TIME": SqlType.time(), "TIMESTAMP": SqlType.timestamp_tz(), }.get(source_column_type) if return_sql_type is None: error_message = "No Hyper SqlType defined for BigQuery source type: {}".format( source_column_type ) logger.error(error_message) raise LookupError(error_message) logger.debug( "Translated source column type {} to Hyper SqlType {}".format( source_column_type, return_sql_type ) ) return return_sql_type
import sys from pathlib import Path from tableauhyperapi import HyperProcess, Telemetry, \ Connection, CreateMode, \ NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \ Inserter, \ escape_name, escape_string_literal, \ HyperException product_table = TableDefinition( # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace. table_name="Products", columns=[ TableDefinition.Column("category", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("title", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("price", SqlType.double(), NOT_NULLABLE), TableDefinition.Column("available_quantity", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("sold_quantity", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("permalink", SqlType.text(), NOT_NULLABLE) ]) def call_mlapi_to_dict(SearchCategory): print("Start call_mlapi_to_dict.") try: