def test_insert_data_into_hyper_file(self): data = [(1001, "Jane", "Doe"), (1002, "John", "Doe"), (2201, "Elonzo", "Smith")] name = "output" table_def = TableDefinition( table_name=TableName("Extract", "Extract"), columns=[ TableDefinition.Column(name=Name("id"), type=SqlType.big_int(), nullability=NULLABLE), TableDefinition.Column(name=Name("first_name"), type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name=Name("last_name"), type=SqlType.text(), nullability=NULLABLE) ]) path = insert_data_into_hyper_file(data, name, table_def) print(f'Database Path : {path}') tables = TestUtils.get_tables("Extract", "/tmp/hyperleaup/output/output.hyper") assert (len(tables) == 1) num_rows = TestUtils.get_row_count( "Extract", "Extract", "/tmp/hyperleaup/output/output.hyper") assert (num_rows == 3)
def create(): with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: request_data = request.get_json() print(request_data) print("The HyperProcess has started.") object_name = "mealprep.hyper" file_name = os.environ.get('bucket_name') with Connection( endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: print("The connection to the Hyper file is open.") connection.catalog.create_schema('Extract') example_table = TableDefinition(TableName('Extract', 'Extract'), [ TableDefinition.Column('Breakfast', SqlType.text()), TableDefinition.Column('Lunch', SqlType.text()), TableDefinition.Column('Dinner', SqlType.text()), ]) print("The table is defined.") connection.catalog.create_table(example_table) print(example_table) print(type(example_table)) with Inserter(connection, example_table) as inserter: for i in request_data['data']: inserter.add_row([i['breakfast'], i['lunch'], i['dinner']]) inserter.execute() print("The data was added to the table.") print("The connection to the Hyper extract file is closed.") print("The HyperProcess has shut down.") with open('mealprep.hyper', 'rb') as reader: if object_name is None: object_name = file_name s3_client = boto3.client( 's3', aws_access_key_id=os.environ.get('aws_access_key_id'), aws_secret_access_key=os.environ.get('aws_secret_access_key')) try: response = s3_client.upload_fileobj(reader, file_name, object_name) except ClientError as e: logging.error(e) return False return redirect(url_for('index'))
def convert_struct_field(column: StructField) -> TableDefinition.Column: """Converts a Spark StructField to a Tableau Hyper SqlType""" if column.dataType == IntegerType(): sql_type = SqlType.int() elif column.dataType == LongType(): sql_type = SqlType.big_int() elif column.dataType == ShortType(): sql_type = SqlType.small_int() elif column.dataType == DoubleType(): sql_type = SqlType.double() elif column.dataType == FloatType(): sql_type = SqlType.double() elif column.dataType == BooleanType(): sql_type = SqlType.bool() elif column.dataType == DateType(): sql_type = SqlType.date() elif column.dataType == TimestampType(): sql_type = SqlType.timestamp() elif column.dataType == StringType(): sql_type = SqlType.text() else: # Trap the DecimalType case if str(column.dataType).startswith("DecimalType"): # Max precision is only up to 18 decimal places in Tableau Hyper API precision = column.dataType.precision if column.dataType.precision <= 18 else 18 scale = column.dataType.scale sql_type = SqlType.numeric(precision, scale) else: raise ValueError(f'Invalid StructField datatype for column `{column.name}` : {column.dataType}') nullable = NULLABLE if column.nullable else NOT_NULLABLE return TableDefinition.Column(name=column.name, type=sql_type, nullability=nullable)
def to_hyper(df, hyper_file_name, custom_schema="Extract", custom_table_name="Extract"): """ Write a Tableau Hyper file from a Pandas DataFrame. Currently can only write single table extracts, which is Tableau's default way of creating an extract. Args: df: Specify which DataFrame you want to output hyper_file_name: Specify the file name such as "Example.hyper" custom_schema: If you need to change the schema name. Defaults to "Extract" custom_table_name: If you need to change the schema name. Defaults to "Extract" Returns: Tableau Hyper file """ # Starts the Hyper Process with HyperProcess( telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU, parameters={"log_config": ""}, ) as hyper: # Creates a .hyper file to put the data into with Connection(hyper.endpoint, hyper_file_name, CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema(custom_schema) # create a .hyper compatible column definition # from pd DataFrame column names and dtypes # using 3 list comprehensions to loop through # all the columns in the DataFrame column_names = [column for column in df.columns] column_dtype = [dtype for dtype in df.dtypes] hyper_table = TableDefinition( TableName(custom_schema, custom_table_name), [ TableDefinition.Column( column_names[column], dtype_mapper[str( column_dtype[column])]) for column in range(len(column_names)) ], ) connection.catalog.create_table(hyper_table) # Repace NaN with None, otherwise it will not be Null in Tableau df.replace({np.nan: None}, inplace=True) # Insert the data values into the hyper file data_to_insert = df.to_numpy() with Inserter(connection, hyper_table) as inserter: inserter.add_rows(tqdm((row for row in data_to_insert))) inserter.execute()
def fn_build_hyper_columns(self, logger, timer, in_data_frame_structure): timer.start() list_to_return = [] for current_field_structure in in_data_frame_structure: list_to_return.append(current_field_structure['order']) current_column_type = self.fn_convert_to_hyper_types( current_field_structure['type']) logger.debug( self.locale.gettext( 'Column {column_order} having name [{column_name}] and type "{column_type}" ' + 'will become "{column_type_new}"').replace( '{column_order}', str(current_field_structure['order'])).replace( '{column_name}', current_field_structure['name']).replace( '{column_type}', str(current_field_structure['type'])).replace( '{column_type_new}', str(current_column_type))) nullability_value = NULLABLE if current_field_structure['nulls'] == 0: nullability_value = NOT_NULLABLE list_to_return[ current_field_structure['order']] = TableDefinition.Column( name=current_field_structure['name'], type=current_column_type, nullability=nullability_value) logger.info(self.locale.gettext('Building Hyper columns completed')) timer.stop() return list_to_return
def create_column_def(table_def_dict: dict): column_def = [] for key in table_def_dict.keys(): column_def.append( TableDefinition.Column( key, sql_type_dict[table_def_dict[key]['type']], nullable_dict[table_def_dict[key]['nullable']])) return column_def
def _hyper_table_definition(self, source_table, hyper_table_name="Extract"): """ Build a hyper table definition from source_schema source_table (obj): Source table (Instance of google.cloud.bigquery.table.Table) hyper_table_name (string): Name of the target Hyper table, default="Extract" Returns a tableauhyperapi.TableDefinition Object """ logger.debug( "Building Hyper TableDefinition for table {}".format(source_table.reference) ) target_cols = [] for source_field in source_table.schema: this_name = source_field.name this_type = self._hyper_sql_type(source_field) this_col = TableDefinition.Column(name=this_name, type=this_type) # Check for Nullability this_mode = source_field.mode if this_mode == "REPEATED": raise ( HyperSQLTypeMappingError( "Field mode REPEATED is not implemented in Hyper" ) ) if this_mode == "REQUIRED": this_col = TableDefinition.Column( this_name, this_type, Nullability.NOT_NULLABLE ) target_cols.append(this_col) logger.debug("..Column {} - Type {}".format(this_name, this_type)) target_schema = TableDefinition( table_name=TableName("Extract", hyper_table_name), columns=target_cols ) return target_schema
def df_to_extract(df, output_path): ''' Converts a Pandas dataframe to a Tableau Extract. Parameters ---------- df (pandas dataframe): Dataframe to turn into a Tableau extract output_path (str): Where to create the Tableau extract ''' # Replace nan's with 0 df = df.replace(np.nan, 0.0, regex=True) print('Creating Tableau data extract...') with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(hyper.endpoint, output_path, CreateMode.CREATE_AND_REPLACE) as connection: # Create schema connection.catalog.create_schema('Extract') # Create list of column definitions, based on the datatypes in pandas dataframe dtype_map = { 'int32': SqlType.int(), 'int64': SqlType.big_int(), 'float32': SqlType.double(), 'float64': SqlType.double(), 'datetime64[ns]': SqlType.date(), 'object': SqlType.text() } table_def = [] # Get column headers to loop through them df_columns = list(df) for col_header in df_columns: dtype_str = str(df[col_header].dtype) # Use dtype_str to lookup appropiate SqlType from dtype_map and append new column to table definition table_def.append(TableDefinition.Column(col_header, dtype_map[dtype_str])) # Define table extract_table = TableDefinition(TableName('Extract', 'Extract'), table_def) # Create table connection.catalog.create_table(extract_table) # Insert data with Inserter(connection, extract_table) as inserter: for idx, row in df.iterrows(): inserter.add_row(row) inserter.execute()
def dss_columns_to_hyper_columns(self, dss_columns): """ Convert the columns of the DSS dataset to Tableau Hyper columns :param dss_columns: columns of the DSS dataset :example: [{"name": "customer_id", "type": "bigint"}, ...] >>> dss_columns = [{"name": "customer_id", "type": "bigint"}] :return: hyper_columns: tableau hyper columns object """ hyper_columns = [] for dss_column in dss_columns: dss_column_name, dss_column_type = dss_column['name'], dss_column[ 'type'] hyper_type = self.type_converter.dss_type_to_hyper(dss_column_type) hyper_columns.append( TableDefinition.Column(dss_column_name, hyper_type)) return hyper_columns
def createHyperFile(): dict = parseData() file = "/Users/jharris/Desktop/workbookUsers.hyper" cols = dict['cols'] data = dict['data'] with HyperProcess( telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(hyper.endpoint, file, CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema('Extract') table = TableDefinition( TableName('Extract', 'Extract'), [TableDefinition.Column(col, SqlType.text()) for col in cols]) connection.catalog.create_table(table) with Inserter(connection, table) as inserter: inserter.add_rows(rows=data) inserter.execute()
def create_extract_schema(raw_data=None, raw_data_path=None): """ [summary] This function generates the tableau hyper file columns to be used for generation of the hyper extract file Args: raw_data ([dataframe], optional): Pass the raw pandas dataframe here. If this is not found, the function will check for raw_file_path. Defaults to None. raw_data_path ([string], optional): Pass path of the csv file for the dataframe if raw dataframe is not available. Defaults to None. Returns: [object list]: Returns a list of objects representing tableau hyper extract columns """ if raw_data is None: try: raw_data = pd.read_csv(raw_data_path) except: return consolelog('ERROR: No filepath found') elif raw_data is not None: pass else: return consolelog('ERROR: No data found') columns = raw_data.columns.tolist() tableau_extract_columns = [] for col in columns: conversion_values = convert_datatype(str(raw_data[[col]].dtypes[0])) converted_type = conversion_values[0] def_value = conversion_values[1] consolelog( f"Column: [{col}] with datatype ~{str(raw_data[[col]].dtypes[0])}~ converted to {converted_type}" ) tableau_extract_columns.append( TableDefinition.Column(col, converted_type)) raw_data[col] = raw_data[col].fillna(def_value) return (tableau_extract_columns, raw_data)
""" # Change these to match your use case. hyper_file_path = "hyper_for_csv.hyper" table_name = TableName("Extract", "Extract") output_name = "output.csv" path_to_database = Path(hyper_file_path) # The table is called "Extract" and will be created in the "Extract" schema # and contains four columns. extract_table = TableDefinition( table_name=table_name, columns=[ TableDefinition.Column(name='Customer ID', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='Customer Name', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='Loyalty Reward Points', type=SqlType.big_int(), nullability=NOT_NULLABLE), TableDefinition.Column(name='Segment', type=SqlType.text(), nullability=NULLABLE) ]) def insert_data(): """
# -*- coding: utf-8 -*- """ Created on Thu Jul 2 21:35:47 2020 @author: Manish Chauhan || +91-9774083186 """ from tableauhyperapi import HyperProcess, Inserter, Telemetry, Connection, CreateMode, NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, escape_name, escape_string_literal, HyperException, TableName # Starting local hyper instance extract_table = TableDefinition( table_name=TableName('extract', 'extract'), columns=[ TableDefinition.Column("Sno", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Name", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Sale", SqlType.big_int()), TableDefinition.Column("Profit", SqlType.big_int()), #TableDefinition.Column("Date", SqlType.date()) ]) data_to_insert = [["1", "Vishakha", 99999, 80]] def run_create_hyper_file_from_csv(): print("Inside Fucntion to pick data from CSV into table in new Hyper file") with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists with Connection( endpoint=hyper.endpoint,
def Full_refresh(result): LogFileWrite("Running Full refresh") try: with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyperprocess: print("The HyperProcess has started.") LogFileWrite("The HyperProcess has started.") print(hyperprocess.is_open) if hyperprocess.is_open==True: with Connection(hyperprocess.endpoint, 'Facebook_campaigns.hyper', CreateMode.CREATE_AND_REPLACE,) as connection: if connection.is_open==True: print("The connection to the Hyper file is open.") LogFileWrite("The connection to the Hyper file is open.") connection.catalog.create_schema('Extract') DataTable = TableDefinition(TableName('Extract','Campaign_data'),[ ############Below Columns are extracted from Report data API TableDefinition.Column('Row_ID', SqlType.big_int()), TableDefinition.Column('Inserted Date', SqlType.date()), TableDefinition.Column('Date', SqlType.date()), TableDefinition.Column('Account Id', SqlType.varchar(50)), TableDefinition.Column('Account Name', SqlType.text()), TableDefinition.Column('Campaign Id', SqlType.varchar(50)), TableDefinition.Column('Campaign Name', SqlType.text()), TableDefinition.Column('Impressions', SqlType.big_int()), TableDefinition.Column('Clicks', SqlType.big_int()), TableDefinition.Column('Reach', SqlType.big_int()), TableDefinition.Column('Spend', SqlType.double()), TableDefinition.Column('Frequency', SqlType.double()), ]) print("The table is defined.") LogFileWrite("Successfully Facebook Campaign Table is defined") connection.catalog.create_table(DataTable) # print(Campaign_df["Id"].dtype) #print(range(len(Campaign_df["Id"]))) with Inserter(connection, TableName('Extract','Campaign_data')) as inserter: inserted_rows=1 row_id=1 for i in range(0,len(result["Campaign Id"])): #print(str(result.loc[i,"CampaignId"])) #print(result.loc[i,"Date"]) inserter.add_row([ int(row_id), datetime.today(), (datetime.strptime(result.loc[i,"Date"], '%Y-%m-%d')), #(datetime.date(result.loc[i,"Date"])),#, "%Y-%m-%d")), str(result.loc[i,"Account Id"]), str(result.loc[i,"Account Name"]), str(result.loc[i,"Campaign Id"]), str(result.loc[i,"Campaign Name"]), int(result.loc[i,"Impressions"]), int(result.loc[i,"Clicks"]), int(result.loc[i,"Reach"]), float(result.loc[i,"Spend"]), float(result.loc[i,"Frequency"]) ]) #print("instered") row_id=row_id+1 inserted_rows=inserted_rows+1 inserter.execute() print("Instered Rows are " +str(inserted_rows)) LogFileWrite("Instered Rows are " +str(inserted_rows)) table_name=TableName('Extract','Campaign_data') Delet_query=f"DELETE FROM {table_name} WHERE " +'"'+ 'Row_ID'+'"'+" NOT IN(" Delet_query+="SELECT MAX("+'"'+'Row_ID'+'"'+f") FROM {table_name} " Delet_query+="GROUP BY " +'"'+'Date'+'",'+'"'+'Campaign Id'+'",'+'"'+'Campaign Name'+'",' Delet_query+='"'+'Account Id'+'",'+'"'+'Impressions'+'",' Delet_query+='"'+'Clicks'+'",'+'"'+'Account Name'+'",'+'"'+'Reach'+'",'+'"'+'Spend'+'",' Delet_query+='"'+'Frequency'+'")' #print(Delet_query) connection.execute_command(Delet_query) print("Deleted Duplicate rows") LogFileWrite("Successfully deleted Duplicate rows") else: print("unable to open connection to hyper file") LogFileWrite("unable to open connection to hyper file") if connection.is_open==True: connection.close() print("Connection to Hyper file closed") LogFileWrite("Connection to Hyper file closed") else: print("Connection to Hyper file closed") LogFileWrite("Connection to Hyper file closed") print("Connection is open or closed" + str(connection.is_open)) else: print("Unable to start the Hyper process ") LogFileWrite("Unable to start the Hyper process ") if hyperprocess.is_open==True: hyperprocess.close() print("Forcefully shutted down the Hyper Process") LogFileWrite("Forcefully shutted down the Hyper Process") else: print("Hyper process is shutted down") LogFileWrite("Hyper process is shutted down") print("Connection is open or closed" + str(connection.is_open)) print("process is open or closed" + str(hyperprocess.is_open)) except HyperException as ex: LogFileWrite("There is exception in starting Tableau Hyper Process. Exiting...") LogFileWrite(str(ex)) connection.close() hyperprocess.close() SendEmailMessage() sys.exit()
def insert_box_events(): # Hyper file instantiation path_to_database = Path(box_hyper_file) hyper_file_exists = Path.exists(path_to_database) # Start the Hyper API pricess with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Check if the Hyper file exists or not. CreateMode.NONE will append. CreateMode.CREATE_AND_REPLACE will create a net new file create_mode = None if hyper_file_exists: create_mode = CreateMode.NONE else: create_mode = CreateMode.CREATE_AND_REPLACE # Open a new connection with Connection(endpoint=hyper.endpoint, database=path_to_database, create_mode=create_mode) as connection: # Check a new schema if it does not exist connection.catalog.create_schema_if_not_exists(schema=box_schema) # Instantiate the table schema box_events_table_def = TableDefinition( table_name=TableName(box_schema, box_events_table), columns=[ TableDefinition.Column(name='event_id', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='event_type', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='created_at', type=SqlType.timestamp_tz(), nullability=NULLABLE), TableDefinition.Column(name='created_by_id', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='created_by_name', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='created_by_login', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='source', type=SqlType.json(), nullability=NULLABLE), TableDefinition.Column(name='ip_address', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='additional_details', type=SqlType.json(), nullability=NULLABLE) ]) print('Found schema: {0} and table def: {1}'.format( box_events_table_def.table_name.schema_name, box_events_table_def.table_name)) # Create the table if it does not exist and get the Box events table connection.catalog.create_table_if_not_exists( table_definition=box_events_table_def) table_name = TableName(box_schema, box_events_table) # Get the MAX row by created_at last_event_created_at = connection.execute_scalar_query( query= f"SELECT MAX(created_at) FROM {box_events_table_def.table_name}" ) if last_event_created_at is not None: print('Found last event in hyper file: {0}'.format( last_event_created_at.to_datetime())) # Get the Box service account client auth = JWTAuth.from_settings_file(box_config) box_client = Client(auth) service_account = box_client.user().get() print( 'Found Service Account with name: {0}, id: {1}, and login: {2}' .format(service_account.name, service_account.id, service_account.login)) # Get the current date and the date for one month ago if there is not lastest event today = datetime.utcnow() if last_event_created_at is None: last_event_created_at = today - relativedelta.relativedelta( months=month_lookback) else: last_event_created_at = last_event_created_at.to_datetime( ).replace(tzinfo=timezone.utc).astimezone(tz=None) # Get the Box enterprise events for a given date range print( 'Using date range for events today: {0} and starting datetime: {1}' .format(today, last_event_created_at)) get_box_events(box_client, 0, last_event_created_at, today) # Insert the Box enteprise events into the Hyper file with Inserter(connection, box_events_table_def) as inserter: inserter.add_rows(rows=box_events) inserter.execute() # Number of rows in the "Box"."Events" table. row_count = connection.execute_scalar_query( query=f"SELECT COUNT(*) FROM {table_name}") print(f"The number of rows in table {table_name} is {row_count}.") print("The connection to the Hyper file has been closed.") print("The Hyper process has been shut down.")
# # ----------------------------------------------------------------------------- from pathlib import Path from tableauhyperapi import HyperProcess, Telemetry, \ Connection, CreateMode, \ NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \ Inserter, \ escape_name, escape_string_literal, \ HyperException customer_table = TableDefinition( # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace. table_name="Customer", columns=[ TableDefinition.Column("Customer ID", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Customer Name", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Loyalty Reward Points", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("Segment", SqlType.text(), NOT_NULLABLE) ]) def run_create_hyper_file_from_csv(): """ An example demonstrating loading data from a csv into a new Hyper file """ print("EXAMPLE - Load data from CSV into table in new Hyper file") path_to_database = Path("customer.hyper")
def __init__(self): self.location = "./output/meli_challenge_result.hyper" self.test_location = "../output/meli_challenge_result.hyper" self.searchResult_table = TableDefinition('results', [ TableDefinition.Column('id', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('site_id', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('title', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('seller', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('price', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('prices', SqlType.json(), Nullability.NOT_NULLABLE), TableDefinition.Column('sale_price', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('currency_id', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('available_quantity', SqlType.int(), Nullability.NULLABLE), TableDefinition.Column('sold_quantity', SqlType.int(), Nullability.NULLABLE), TableDefinition.Column('buying_mode', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('listing_type_id', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('stop_time', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('condition', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('permalink', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('thumbnail', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('accepts_mercadopago', SqlType.bool(), Nullability.NULLABLE), TableDefinition.Column('installments', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('address', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('shipping', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('seller_address', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('attributes', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('original_price', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('category_id', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('official_store_id', SqlType.int(), Nullability.NULLABLE), TableDefinition.Column('domain_id', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('catalog_product_id', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('tags', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('catalog_listing', SqlType.bool(), Nullability.NULLABLE), TableDefinition.Column('order_backend', SqlType.int(), Nullability.NULLABLE), ])
# Step 1: Start Hyper instance with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: #step 2: Start hyper process with Connection(endpoint=hyper.endpoint, create_mode=CreateMode.CREATE_AND_REPLACE, database=PATH_TO_HYPER) as connection: print( connection.execute_scalar_query( query="SELECT'securing connections with endpoint!'")) #create table def with Table name empdetails = TableDefinition( table_name=TableName('Extract', 'empdetails'), columns=[ TableDefinition.Column('EMP_ID', SqlType.int()), TableDefinition.Column('EMP_NAME', SqlType.text()), TableDefinition.Column('MONTHY_SALARY', SqlType.int()), TableDefinition.Column('UNI_FLAG', SqlType.int()) ]) #Create the table in the connection catalog connection.catalog.create_schema('Extract') connection.catalog.create_table(empdetails) print('Validating the data...') cmd = f"COPY{empdetails.table_name} from {escape_string_literal(PATH_TO_CSV)}\ with " "(format csv , NULL 'NULL', delimiter ',' , HEADER)" table_count = connection.execute_command(cmd) total_time = int((time.time() - start)) total_time = "{:.2f}".format(total_time)
def run_insert_data_with_expressions(): """ An example of how to push down computations to Hyper during insertion with expressions. """ print("EXAMPLE - Push down computations to Hyper during insertion with expressions") path_to_database = Path("orders.hyper") # Starts the Hyper Process with telemetry enabled to send data to Tableau. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU. with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Creates new Hyper file "orders.hyper". # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists. with Connection(endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema(schema=extract_table.table_name.schema_name) connection.catalog.create_table(table_definition=extract_table) # Hyper API's Inserter allows users to transform data during insertion. # To make use of data transformation during insertion, the inserter requires the following inputs # 1. The connection to the Hyper instance containing the table. # 2. The table name or table defintion into which data is inserted. # 3. List of Inserter.ColumnMapping. # This list informs the inserter how each column in the target table is tranformed. # The list must contain all the columns into which data is inserted. # "Inserter.ColumnMapping" maps a valid SQL expression (if any) to a column in the target table. # For example Inserter.ColumnMapping('target_column_name', f'{escape_name("colA")}*{escape_name("colB")}') # The column "target_column" contains the product of "colA" and "colB" after successful insertion. # SQL expression string is optional in Inserter.ColumnMapping. # For a column without any transformation only the column name is required. # For example Inserter.ColumnMapping('no_data_transformation_column') # 4. The Column Definition of all input values provided to the Inserter # Inserter definition contains the column definition for the values that are inserted inserter_definition = [ TableDefinition.Column(name='Order ID', type=SqlType.int(), nullability=NOT_NULLABLE), TableDefinition.Column(name='Ship Timestamp Text', type=SqlType.text(), nullability=NOT_NULLABLE), TableDefinition.Column(name='Ship Mode', type=SqlType.text(), nullability=NOT_NULLABLE), TableDefinition.Column(name='Ship Priority Text', type=SqlType.text(), nullability=NOT_NULLABLE)] # Column 'Order Id' is inserted into "Extract"."Extract" as-is # Column 'Ship Timestamp' in "Extract"."Extract" of timestamp type is computed from Column 'Ship Timestamp Text' of text type using 'to_timestamp()' # Column 'Ship Mode' is inserted into "Extract"."Extract" as-is # Column 'Ship Priority' is "Extract"."Extract" of integer type is computed from Colum 'Ship Priority Text' of text type using 'CASE' statement shipPriorityAsIntCaseExpression = f'CASE {escape_name("Ship Priority Text")} ' \ f'WHEN {escape_string_literal("Urgent")} THEN 1 ' \ f'WHEN {escape_string_literal("Medium")} THEN 2 ' \ f'WHEN {escape_string_literal("Low")} THEN 3 END' column_mappings = [ 'Order ID', Inserter.ColumnMapping( 'Ship Timestamp', f'to_timestamp({escape_name("Ship Timestamp Text")}, {escape_string_literal("YYYY-MM-DD HH24:MI:SS")})'), 'Ship Mode', Inserter.ColumnMapping('Ship Priority', shipPriorityAsIntCaseExpression) ] # Data to be inserted data_to_insert = [ [399, '2012-09-13 10:00:00', 'Express Class', 'Urgent'], [530, '2012-07-12 14:00:00', 'Standard Class', 'Low'] ] # Insert data into "Extract"."Extract" table with expressions with Inserter(connection, extract_table, column_mappings, inserter_definition=inserter_definition) as inserter: inserter.add_rows(rows=data_to_insert) inserter.execute() print("The data was added to the table.") print("The connection to the Hyper file has been closed.") print("The Hyper process has been shut down.")
x.append(workbook.name) y.append(workbook.owner_id) for users in y: z.append(server.users.get_by_id(users)) with open('workbook_info.csv', 'w', newline='') as f: writer = csv.writer(f) writer.writerows(zip(x, y, z)) # HYPER API TO CREATE HYPER FILE Workbook_Info_table = TableDefinition( table_name="Workbook_Info", columns=[ TableDefinition.Column("Workbook Name", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Workbook LUID", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Owner Info", SqlType.text(), NOT_NULLABLE) ] ) def run_create_hyper_file_from_csv(): path_to_database = Path("Workbook_Info.hyper") process_parameters = { "log_file_max_count": "2", "log_file_size_limit": "100M" } with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU, parameters=process_parameters) as hyper:
from tableauhyperapi import Connection, HyperProcess, SqlType, TableDefinition, \ escape_string_literal, escape_name, NOT_NULLABLE, Telemetry, Inserter, CreateMode, TableName def run_create_hyper_file(): with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: print("The HyperProcess has started.") with Connection(hyper.endpoint, 'TrivialExample.hyper', CreateMode.CREATE_AND_REPLACE) as connection: print("The connection to the Hyper file is open.") connection.catalog.create_schema('Extract') example_table = TableDefinition(TableName('Extract','Extract'), [ TableDefinition.Column('rowID', SqlType.big_int()), TableDefinition.Column('value', SqlType.big_int()), ]) print("The table is defined.") connection.catalog.create_table(example_table) with Inserter(connection, example_table) as inserter: for i in range (1, 101): inserter.add_row( [ i, i ] ) inserter.execute() print("The data was added to the table.") print("The connection to the Hyper extract file is closed.") print("The HyperProcess has shut down.") if __name__ == "__main__": try: run_create_hyper_file() except HyperException as ex: print(ex)
import csv import tableauserverclient as TSC import yaml from pathlib import Path from tableauhyperapi import HyperProcess, Telemetry, \ Connection, CreateMode, \ NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \ Inserter, \ escape_name, escape_string_literal, \ HyperException #Create table definition to be used in Hyper file workbook_table = TableDefinition( table_name="Workbooks", columns=[ TableDefinition.Column("Project Id", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Content URL", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Created At", SqlType.date(), NOT_NULLABLE), TableDefinition.Column("Id", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Project Name", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("size", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("Updated At", SqlType.date(), NOT_NULLABLE), TableDefinition.Column("Name", SqlType.text(), NOT_NULLABLE) ]) #Name of hyper file that will be generated locally and deployed to Tableau path_to_database = Path("workbooks.hyper") #Name of project where Hyper file will be deployed #This project must exist on your server PROJECT = 'HyperTest'
from tableauhyperapi import HyperProcess, Telemetry, \ Connection, CreateMode, \ NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \ Inserter, \ escape_name, escape_string_literal, \ TableName, Name, \ HyperException # The table is called "Extract" and will be created in the "Extract" schema. # This has historically been the default table name and schema for extracts created by Tableau extract_table = TableDefinition( table_name=TableName("Extract", "Extract"), columns=[ TableDefinition.Column(name='Name', type=SqlType.text(), nullability=NOT_NULLABLE), TableDefinition.Column(name='Location', type=SqlType.geography(), nullability=NOT_NULLABLE) ]) def run_insert_spatial_data_to_a_hyper_file(): """ An example of how to add spatial data to a Hyper file. """ print("EXAMPLE - Add spatial data to a Hyper file ") path_to_database = Path("spatial_data.hyper") # Starts the Hyper Process with telemetry enabled to send data to Tableau.
import sys from pathlib import Path from tableauhyperapi import HyperProcess, Telemetry, \ Connection, CreateMode, \ NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \ Inserter, \ escape_name, escape_string_literal, \ HyperException product_table = TableDefinition( # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace. table_name="Products", columns=[ TableDefinition.Column("category", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("title", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("price", SqlType.double(), NOT_NULLABLE), TableDefinition.Column("available_quantity", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("sold_quantity", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("permalink", SqlType.text(), NOT_NULLABLE) ]) def call_mlapi_to_dict(SearchCategory): print("Start call_mlapi_to_dict.") try:
def run_insert_spatial_data_to_a_hyper_file(): """ An example of how to add spatial data to a Hyper file. """ print("EXAMPLE - Add spatial data to a Hyper file ") path_to_database = Path("spatial_data.hyper") # Starts the Hyper Process with telemetry enabled to send data to Tableau. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU. with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Creates new Hyper file "spatial_data.hyper". # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists. with Connection( endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema( schema=extract_table.table_name.schema_name) connection.catalog.create_table(table_definition=extract_table) # Hyper API's Inserter allows users to transform data during insertion. # To make use of data transformation during insertion, the inserter requires the following inputs # 1. The connection to the Hyper instance containing the table. # 2. The table name or table defintion into which data is inserted. # 3. List of Inserter.ColumnMapping. # This list informs the inserter how each column in the target table is tranformed. # The list must contain all the columns into which data is inserted. # "Inserter.ColumnMapping" maps a valid SQL expression (if any) to a column in the target table. # For example Inserter.ColumnMapping('target_column_name', f'{escape_name("colA")}*{escape_name("colB")}') # The column "target_column" contains the product of "colA" and "colB" after successful insertion. # SQL expression string is optional in Inserter.ColumnMapping. # For a column without any transformation only the column name is required. # For example Inserter.ColumnMapping('no_data_transformation_column') # 4. The Column Definition of all input values provided to the Inserter # Inserter definition contains the column definition for the values that are inserted # The data input has two text values Name and Location_as_text inserter_definition = [ TableDefinition.Column(name='Name', type=SqlType.text(), nullability=NOT_NULLABLE), TableDefinition.Column(name='Location_as_text', type=SqlType.text(), nullability=NOT_NULLABLE) ] # Column 'Name' is inserted into "Extract"."Extract" as-is. # Column 'Location' in "Extract"."Extract" of geography type is computed from Column 'Location_as_text' of text type # using the expression 'CAST("Location_as_text") AS GEOGRAPHY'. # Inserter.ColumnMapping is used for mapping the CAST expression to Column 'Location'. column_mappings = [ 'Name', Inserter.ColumnMapping( 'Location', f'CAST({escape_name("Location_as_text")} AS GEOGRAPHY)') ] # Data to be inserted. data_to_insert = [['Seattle', "point(-122.338083 47.647528)"], ['Munich', "point(11.584329 48.139257)"]] # Insert data into "Extract"."Extract" table with CAST expression. with Inserter(connection, extract_table, column_mappings, inserter_definition=inserter_definition) as inserter: inserter.add_rows(rows=data_to_insert) inserter.execute() print("The data was added to the table.") print("The connection to the Hyper file has been closed.") print("The Hyper process has been shut down.")
query_res['column_ex'] = query_res['column_ex'].astype(str) query_res['column_ex2'] = query_res['column_ex2'].astype(str) query_res['column_ex3'] = query_res['column_ex3'].astype(str) query_res['column_ex4'] = query_res['column_ex4'].astype(str) query_res['column_ex5'] = query_res['column_ex5'].astype(str) query_res['column_ex6'] = query_res['column_ex6'].astype(str) with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema('Extract') # Define the hyper table (Same definition as the full refresh) example_table = TableDefinition(TableName('Extract', 'Extract'), [ TableDefinition.Column('column_ex', SqlType.varchar(500)), TableDefinition.Column('column_ex2', SqlType.varchar(500)), TableDefinition.Column('column_ex_int', SqlType.int()), TableDefinition.Column('column_ex3', SqlType.varchar(500)), TableDefinition.Column('column_ex_int2', SqlType.int()), TableDefinition.Column('column_ex4', SqlType.varchar(500)), TableDefinition.Column('column_ex5', SqlType.varchar(500)), TableDefinition.Column('column_ex_int3', SqlType.int()), TableDefinition.Column('column_ex6', SqlType.varchar(500)), ]) print("The table is defined.") connection.catalog.create_table(table_definition=example_table) # Insert data from dataframe to hyper table with Inserter(connection, example_table) as inserter: for i in range(len(query_res)): inserter.add_row(
with HyperProcess( telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Creates a new Hyper file or # replaces the file if it already exists. with Connection(endpoint=hyper.endpoint, database="drinks.hyper", create_mode=CreateMode.CREATE_AND_REPLACE) as connection: # default schema name is "public" # define table drinks_table = TableDefinition( table_name="drinks", columns=[ TableDefinition.Column("country", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("beer_servings", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("spirit_servings", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("wine_servings", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("total_litres_of_pure_alcohol", SqlType.double(), NOT_NULLABLE), TableDefinition.Column("continent", SqlType.text(), NOT_NULLABLE) ]) # create tables connection.catalog.create_table(drinks_table)
from pathlib import Path from tableauhyperapi import HyperProcess, Telemetry, \ Connection, CreateMode, \ NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \ Inserter, \ escape_name, escape_string_literal, \ TableName, Name, \ HyperException # The table is called "Extract" and will be created in the "Extract" schema. # This has historically been the default table name and schema for extracts created by Tableau extract_table = TableDefinition( table_name=TableName("Extract", "Extract"), columns=[ TableDefinition.Column(name='Order ID', type=SqlType.int(), nullability=NOT_NULLABLE), TableDefinition.Column(name='Ship Timestamp', type=SqlType.timestamp(), nullability=NOT_NULLABLE), TableDefinition.Column(name='Ship Mode', type=SqlType.text(), nullability=NOT_NULLABLE), TableDefinition.Column(name='Ship Priority', type=SqlType.int(), nullability=NOT_NULLABLE) ] ) def run_insert_data_with_expressions(): """ An example of how to push down computations to Hyper during insertion with expressions. """ print("EXAMPLE - Push down computations to Hyper during insertion with expressions") path_to_database = Path("orders.hyper") # Starts the Hyper Process with telemetry enabled to send data to Tableau.
# ----------------------------------------------------------------------------- from pathlib import Path from tableauhyperapi import HyperProcess, Telemetry, \ Connection, CreateMode, \ NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \ Inserter, \ escape_name, escape_string_literal, \ HyperException customer_table = TableDefinition( # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace. table_name="test02", columns=[ TableDefinition.Column("ID", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Mathematics", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Science", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("English", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("Japanese", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("History", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Geography", SqlType.text(), NOT_NULLABLE), ] ) def run_create_hyper_file_from_csv(): """ An example demonstrating loading data from a csv into a new Hyper file """ print("EXAMPLE - Load data from CSV into table in new Hyper file")
# with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU, 'myapp' ) as hyper: # Step 2: Create the the .hyper file, replace it if it already exists with Connection(endpoint=hyper.endpoint, create_mode=CreateMode.CREATE_AND_REPLACE, database=path_to_hyper) as connection: # Step 3: Create the schema connection.catalog.create_schema('Extract') # Step 4: Create the table definition schema = TableDefinition(table_name=TableName('Extract','Extract'), columns=[ TableDefinition.Column('name', SqlType.text()), TableDefinition.Column('date', SqlType.date()), TableDefinition.Column('temperature', SqlType.double()), TableDefinition.Column('chance_precipitation', SqlType.double()), TableDefinition.Column('precipitation', SqlType.double()), TableDefinition.Column('wind_speed', SqlType.double()), TableDefinition.Column('wind_gust', SqlType.double()), TableDefinition.Column('visiblity', SqlType.double()), TableDefinition.Column('cloud_cover', SqlType.double()), TableDefinition.Column('relative_humidity', SqlType.double()), TableDefinition.Column('moon_phase', SqlType.double()), TableDefinition.Column('condition', SqlType.text()), ]) # Step 5: Create the table in the connection catalog connection.catalog.create_table(schema)