def createTabTable(tableName, columnHeading, dataTypes): extract_table = TableDefinition(table_name=TableName("Extract", tableName)) for head in columnHeading: extract_table.add_column(head, eval(mapper[dataTypes.pop(0)])) return extract_table
def to_hyper(df, hyper_file_name, custom_schema="Extract", custom_table_name="Extract"): """ Write a Tableau Hyper file from a Pandas DataFrame. Currently can only write single table extracts, which is Tableau's default way of creating an extract. Args: df: Specify which DataFrame you want to output hyper_file_name: Specify the file name such as "Example.hyper" custom_schema: If you need to change the schema name. Defaults to "Extract" custom_table_name: If you need to change the schema name. Defaults to "Extract" Returns: Tableau Hyper file """ # Starts the Hyper Process with HyperProcess( telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU, parameters={"log_config": ""}, ) as hyper: # Creates a .hyper file to put the data into with Connection(hyper.endpoint, hyper_file_name, CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema(custom_schema) # create a .hyper compatible column definition # from pd DataFrame column names and dtypes # using 3 list comprehensions to loop through # all the columns in the DataFrame column_names = [column for column in df.columns] column_dtype = [dtype for dtype in df.dtypes] hyper_table = TableDefinition( TableName(custom_schema, custom_table_name), [ TableDefinition.Column( column_names[column], dtype_mapper[str( column_dtype[column])]) for column in range(len(column_names)) ], ) connection.catalog.create_table(hyper_table) # Repace NaN with None, otherwise it will not be Null in Tableau df.replace({np.nan: None}, inplace=True) # Insert the data values into the hyper file data_to_insert = df.to_numpy() with Inserter(connection, hyper_table) as inserter: inserter.add_rows(tqdm((row for row in data_to_insert))) inserter.execute()
def test_insert_data_into_hyper_file(self): data = [(1001, "Jane", "Doe"), (1002, "John", "Doe"), (2201, "Elonzo", "Smith")] name = "output" table_def = TableDefinition( table_name=TableName("Extract", "Extract"), columns=[ TableDefinition.Column(name=Name("id"), type=SqlType.big_int(), nullability=NULLABLE), TableDefinition.Column(name=Name("first_name"), type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name=Name("last_name"), type=SqlType.text(), nullability=NULLABLE) ]) path = insert_data_into_hyper_file(data, name, table_def) print(f'Database Path : {path}') tables = TestUtils.get_tables("Extract", "/tmp/hyperleaup/output/output.hyper") assert (len(tables) == 1) num_rows = TestUtils.get_row_count( "Extract", "Extract", "/tmp/hyperleaup/output/output.hyper") assert (num_rows == 3)
def importTextSchema( schemaJson ): try: if 'name' not in schemaJson: print('[ERROR] No table name defined in the schema json. Key \'name\' required of type STRING:\nExiting now\n.') exit( -1 ) if 'columns' not in schemaJson: print('[ERROR] No columns defined in the schema json. Key \'columns\' required of type [OBJECT]:\nExiting now\n.') exit( -1 ) print('[INFO] Creating temp landing table.') table_def = TableDefinition('temptable'); for c in schemaJson['columns']: colFunc = typeObj['TEXT'] colType = colFunc() table_def.add_column(c['name'], colType) if ( table_def == None ): print('[ERROR] A fatal error occurred while creating the temp table:\nExiting now\n.') exit( -1 ) except HyperException as e: print('[ERROR] A fatal error occurred while reading the schema definition:\n', e, '\nExiting now.') exit( -1 ) return table_def
def create_schema(self, schema_dss, destination_file_path): """ Read the Tableau Hyper file an. :param schema_dss: DSS schema from the DSS dataset to export example: [{"columns": [{"name": "customer_id", "type": "bigint"}, ...]}, ...] :param destination_file_path: :return: """ # Read the destination file of the dss self.output_file = destination_file_path logger.info( "Writing the Tableau Hyper file to the following location: {}". format(destination_file_path)) logger.info( "The dataset to export has the following schema: {}".format( schema_dss)) dss_columns = schema_dss['columns'] dss_storage_types = [ column_descriptor['type'] for column_descriptor in dss_columns ] self.schema_converter.set_dss_storage_types(dss_storage_types) self.is_geo_table = dss_is_geo(schema_dss) logger.info("The input dataset contains a geo column: {}".format( self.is_geo_table)) if not self.schema_name or not self.table_name: logger.warning("Did not received the table or schema name.") raise ValueError("No valid schema or table name received.") logger.info("Received target schema {} and table {}".format( self.schema_name, self.table_name)) # Create the Tableau Hyper schema from the DSS schema self.output_table_definition = TableDefinition( TableName(self.schema_name, self.table_name), self.schema_converter.dss_columns_to_hyper_columns(dss_columns)) # Open connection to file self.hyper = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) self.connection = Connection(self.hyper.endpoint, self.output_file, CreateMode.CREATE_AND_REPLACE) assert self.connection is not None self.connection.catalog.create_schema(self.schema_name) self.connection.catalog.create_table(self.output_table_definition) # Handle the geo case if self.is_geo_table: logger.info("Detected geo column. Creating a temporary table...") dss_tmp_schema = geo_to_text(schema_dss) dss_tmp_columns = dss_tmp_schema['columns'] self.tmp_table_definition = TableDefinition( TableName(self.schema_name, "tmp_" + self.table_name), self.schema_converter.dss_columns_to_hyper_columns( dss_tmp_columns)) self.connection.catalog.create_table(self.tmp_table_definition) logger.info("Created temporary table")
def createHyperTable(): columns = getConfig() page_table = TableDefinition(table_name="Page") for i in range(0, len(columns)): if columns[i]['DataType'] == "String": dt = SqlType.text() elif columns[i]['DataType'] == "Date": dt = SqlType.date() else: dt = SqlType.big_int() page_table.add_column(columns[i]['ColumnName'], dt) return page_table
def df_to_extract(df, output_path): ''' Converts a Pandas dataframe to a Tableau Extract. Parameters ---------- df (pandas dataframe): Dataframe to turn into a Tableau extract output_path (str): Where to create the Tableau extract ''' # Replace nan's with 0 df = df.replace(np.nan, 0.0, regex=True) print('Creating Tableau data extract...') with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(hyper.endpoint, output_path, CreateMode.CREATE_AND_REPLACE) as connection: # Create schema connection.catalog.create_schema('Extract') # Create list of column definitions, based on the datatypes in pandas dataframe dtype_map = { 'int32': SqlType.int(), 'int64': SqlType.big_int(), 'float32': SqlType.double(), 'float64': SqlType.double(), 'datetime64[ns]': SqlType.date(), 'object': SqlType.text() } table_def = [] # Get column headers to loop through them df_columns = list(df) for col_header in df_columns: dtype_str = str(df[col_header].dtype) # Use dtype_str to lookup appropiate SqlType from dtype_map and append new column to table definition table_def.append(TableDefinition.Column(col_header, dtype_map[dtype_str])) # Define table extract_table = TableDefinition(TableName('Extract', 'Extract'), table_def) # Create table connection.catalog.create_table(extract_table) # Insert data with Inserter(connection, extract_table) as inserter: for idx, row in df.iterrows(): inserter.add_row(row) inserter.execute()
def create(): with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: request_data = request.get_json() print(request_data) print("The HyperProcess has started.") object_name = "mealprep.hyper" file_name = os.environ.get('bucket_name') with Connection( endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: print("The connection to the Hyper file is open.") connection.catalog.create_schema('Extract') example_table = TableDefinition(TableName('Extract', 'Extract'), [ TableDefinition.Column('Breakfast', SqlType.text()), TableDefinition.Column('Lunch', SqlType.text()), TableDefinition.Column('Dinner', SqlType.text()), ]) print("The table is defined.") connection.catalog.create_table(example_table) print(example_table) print(type(example_table)) with Inserter(connection, example_table) as inserter: for i in request_data['data']: inserter.add_row([i['breakfast'], i['lunch'], i['dinner']]) inserter.execute() print("The data was added to the table.") print("The connection to the Hyper extract file is closed.") print("The HyperProcess has shut down.") with open('mealprep.hyper', 'rb') as reader: if object_name is None: object_name = file_name s3_client = boto3.client( 's3', aws_access_key_id=os.environ.get('aws_access_key_id'), aws_secret_access_key=os.environ.get('aws_secret_access_key')) try: response = s3_client.upload_fileobj(reader, file_name, object_name) except ClientError as e: logging.error(e) return False return redirect(url_for('index'))
def fn_build_hyper_columns(self, logger, timer, in_data_frame_structure): timer.start() list_to_return = [] for current_field_structure in in_data_frame_structure: list_to_return.append(current_field_structure['order']) current_column_type = self.fn_convert_to_hyper_types( current_field_structure['type']) logger.debug( self.locale.gettext( 'Column {column_order} having name [{column_name}] and type "{column_type}" ' + 'will become "{column_type_new}"').replace( '{column_order}', str(current_field_structure['order'])).replace( '{column_name}', current_field_structure['name']).replace( '{column_type}', str(current_field_structure['type'])).replace( '{column_type_new}', str(current_column_type))) nullability_value = NULLABLE if current_field_structure['nulls'] == 0: nullability_value = NOT_NULLABLE list_to_return[ current_field_structure['order']] = TableDefinition.Column( name=current_field_structure['name'], type=current_column_type, nullability=nullability_value) logger.info(self.locale.gettext('Building Hyper columns completed')) timer.stop() return list_to_return
def convert_struct_field(column: StructField) -> TableDefinition.Column: """Converts a Spark StructField to a Tableau Hyper SqlType""" if column.dataType == IntegerType(): sql_type = SqlType.int() elif column.dataType == LongType(): sql_type = SqlType.big_int() elif column.dataType == ShortType(): sql_type = SqlType.small_int() elif column.dataType == DoubleType(): sql_type = SqlType.double() elif column.dataType == FloatType(): sql_type = SqlType.double() elif column.dataType == BooleanType(): sql_type = SqlType.bool() elif column.dataType == DateType(): sql_type = SqlType.date() elif column.dataType == TimestampType(): sql_type = SqlType.timestamp() elif column.dataType == StringType(): sql_type = SqlType.text() else: # Trap the DecimalType case if str(column.dataType).startswith("DecimalType"): # Max precision is only up to 18 decimal places in Tableau Hyper API precision = column.dataType.precision if column.dataType.precision <= 18 else 18 scale = column.dataType.scale sql_type = SqlType.numeric(precision, scale) else: raise ValueError(f'Invalid StructField datatype for column `{column.name}` : {column.dataType}') nullable = NULLABLE if column.nullable else NOT_NULLABLE return TableDefinition.Column(name=column.name, type=sql_type, nullability=nullable)
def get_table_def(df: DataFrame, schema_name: str, table_name: str) -> TableDefinition: """Returns a Tableau TableDefintion given a Spark DataFrame""" schema = df.schema cols = list(map(convert_struct_field, schema)) return TableDefinition(table_name=TableName("Extract", "Extract"), columns=cols)
def get_table_def(df: DataFrame, schema_name: str = 'Extract', table_name: str = 'Extract') -> TableDefinition: """Returns a Tableau TableDefintion given a Spark DataFrame""" schema = df.schema cols = list(map(HyperUtils.convert_struct_field, schema)) return TableDefinition(table_name=TableName(schema_name, table_name), columns=cols)
def create_column_def(table_def_dict: dict): column_def = [] for key in table_def_dict.keys(): column_def.append( TableDefinition.Column( key, sql_type_dict[table_def_dict[key]['type']], nullable_dict[table_def_dict[key]['nullable']])) return column_def
def fn_create_hyper_table(self, local_logger, timer, in_dict): timer.start() out_hyper_table = TableDefinition( TableName(in_dict['schema name'], in_dict['table name']), columns=in_dict['columns'], ) in_dict['connection'].catalog.create_table(table_definition=out_hyper_table) local_logger.info(self.locale.gettext( 'Hyper table "{hyper_table_name}" has been created') .replace('{hyper_table_name}', in_dict['table name'])) timer.stop() return out_hyper_table
def createHyperFile(): dict = parseData() file = "/Users/jharris/Desktop/workbookUsers.hyper" cols = dict['cols'] data = dict['data'] with HyperProcess( telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(hyper.endpoint, file, CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema('Extract') table = TableDefinition( TableName('Extract', 'Extract'), [TableDefinition.Column(col, SqlType.text()) for col in cols]) connection.catalog.create_table(table) with Inserter(connection, table) as inserter: inserter.add_rows(rows=data) inserter.execute()
def _hyper_table_definition(self, source_table, hyper_table_name="Extract"): """ Build a hyper table definition from source_schema source_table (obj): Source table (Instance of google.cloud.bigquery.table.Table) hyper_table_name (string): Name of the target Hyper table, default="Extract" Returns a tableauhyperapi.TableDefinition Object """ logger.debug( "Building Hyper TableDefinition for table {}".format(source_table.reference) ) target_cols = [] for source_field in source_table.schema: this_name = source_field.name this_type = self._hyper_sql_type(source_field) this_col = TableDefinition.Column(name=this_name, type=this_type) # Check for Nullability this_mode = source_field.mode if this_mode == "REPEATED": raise ( HyperSQLTypeMappingError( "Field mode REPEATED is not implemented in Hyper" ) ) if this_mode == "REQUIRED": this_col = TableDefinition.Column( this_name, this_type, Nullability.NOT_NULLABLE ) target_cols.append(this_col) logger.debug("..Column {} - Type {}".format(this_name, this_type)) target_schema = TableDefinition( table_name=TableName("Extract", hyper_table_name), columns=target_cols ) return target_schema
def importSchema( schemaJson ): try: if 'name' not in schemaJson: print('[ERROR] No table name defined in the schema json. Key \'name\' required of type STRING:\nExiting now\n.') exit( -1 ) if 'columns' not in schemaJson: print('[ERROR] No columns defined in the schema json. Key \'columns\' required of type [OBJECT]:\nExiting now\n.') exit( -1 ) table_def = TableDefinition(schemaJson['name']); for c in schemaJson['columns']: if ( 'name' not in c ): print('No column name defined in the schema json. Key \'name\' required of type STRING:\nExiting now\n.') exit( -1 ) colFunc = typeObj['TEXT'] if( 'type' in c and c['type'] in typeObj): colFunc = typeObj[c['type']] if (c['type'] == 'CHAR'): if ( 'length' not in c ): print('No length defined for CHAR column', c['name'],'\nKey \'length\' required of type INTEGER:\nExiting now\n.') exit( -1 ) colType = colFunc(c['length']) elif (c['type'] == 'VARCHAR'): if ( 'length' not in c ): print('No length defined for VARCHAR column', c['name'],'\nKey \'length\' required of type INTEGER:\nExiting now\n.') exit( -1 ) colType = colFunc(c['length']) elif (c['type'] == 'NUMERIC'): if ( 'precision' not in c or 'scale' not in c ): print('No precision and/or scale defined for NUMERIC column', c['name'],'\nKeys \'precision\' and \'scale\' required of type INTEGER:\nExiting now\n.') exit( -1 ) colType = colFunc(c['precision'], c['scale']) else: colType = colFunc() else: colType = colFunc() if( 'collation' in c and c['collation'] in collationObj): colCollation = collationObj[c['collation']] table_def.add_column(c['name'], colType, colCollation) else: table_def.add_column(c['name'], colType) if ( table_def == None ): print('[ERROR] A fatal error occurred while creating the table:\nExiting now\n.') exit( -1 ) except HyperException as e: print('[ERROR] A fatal error occurred while reading the schema definition:\n', e, '\nExiting now.') exit( -1 ) return table_def
def dss_columns_to_hyper_columns(self, dss_columns): """ Convert the columns of the DSS dataset to Tableau Hyper columns :param dss_columns: columns of the DSS dataset :example: [{"name": "customer_id", "type": "bigint"}, ...] >>> dss_columns = [{"name": "customer_id", "type": "bigint"}] :return: hyper_columns: tableau hyper columns object """ hyper_columns = [] for dss_column in dss_columns: dss_column_name, dss_column_type = dss_column['name'], dss_column[ 'type'] hyper_type = self.type_converter.dss_type_to_hyper(dss_column_type) hyper_columns.append( TableDefinition.Column(dss_column_name, hyper_type)) return hyper_columns
def create_hyper(dist_path: str, table_def_dict: dict, file_name: str, src_path: str): # Hyperファイルを扱うためのセッションを作成 with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection( endpoint=hyper.endpoint, database=dist_path, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: column_def = create_column_def(table_def_dict) # テーブルを定義 table_def = TableDefinition(table_name=file_name, columns=column_def) # テーブル定義をもとに仮想的にテーブル作成(default schemaはpublic) connection.catalog.create_table(table_def) # PostgreSQLライクにCOPYコマンドを実行 record_count = connection.execute_command(command=f''' COPY {table_def.table_name} from {escape_string_literal(src_path)} with (format csv, delimiter ',', header) ''') print( f"The number of rows in table {table_def.table_name} is {record_count}." )
def create_extract_schema(raw_data=None, raw_data_path=None): """ [summary] This function generates the tableau hyper file columns to be used for generation of the hyper extract file Args: raw_data ([dataframe], optional): Pass the raw pandas dataframe here. If this is not found, the function will check for raw_file_path. Defaults to None. raw_data_path ([string], optional): Pass path of the csv file for the dataframe if raw dataframe is not available. Defaults to None. Returns: [object list]: Returns a list of objects representing tableau hyper extract columns """ if raw_data is None: try: raw_data = pd.read_csv(raw_data_path) except: return consolelog('ERROR: No filepath found') elif raw_data is not None: pass else: return consolelog('ERROR: No data found') columns = raw_data.columns.tolist() tableau_extract_columns = [] for col in columns: conversion_values = convert_datatype(str(raw_data[[col]].dtypes[0])) converted_type = conversion_values[0] def_value = conversion_values[1] consolelog( f"Column: [{col}] with datatype ~{str(raw_data[[col]].dtypes[0])}~ converted to {converted_type}" ) tableau_extract_columns.append( TableDefinition.Column(col, converted_type)) raw_data[col] = raw_data[col].fillna(def_value) return (tableau_extract_columns, raw_data)
from pathlib import Path from tableauhyperapi import HyperProcess, Telemetry, \ Connection, CreateMode, NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \ Inserter, escape_name, escape_string_literal, HyperException import config as cf from spark_connect import sparkConnect # test = sparkConnect() # schema = test[1] # print(schema) customer_table = TableDefinition( # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace. table_name=cf.table_name, columns=sparkConnect()) print('\n', cf.input_file_path, '\n') def run_create_hyper_file_from_csv(): """ Loading data from a csv into a new Hyper file """ print("Load data from CSV into table in new Hyper file") # Starts the Hyper Process with telemetry enabled to send data to Tableau. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU. with HyperProcess( telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection( endpoint=hyper.endpoint,
def Full_refresh(result): LogFileWrite("Running Full refresh") try: with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyperprocess: print("The HyperProcess has started.") LogFileWrite("The HyperProcess has started.") print(hyperprocess.is_open) if hyperprocess.is_open==True: with Connection(hyperprocess.endpoint, 'Facebook_campaigns.hyper', CreateMode.CREATE_AND_REPLACE,) as connection: if connection.is_open==True: print("The connection to the Hyper file is open.") LogFileWrite("The connection to the Hyper file is open.") connection.catalog.create_schema('Extract') DataTable = TableDefinition(TableName('Extract','Campaign_data'),[ ############Below Columns are extracted from Report data API TableDefinition.Column('Row_ID', SqlType.big_int()), TableDefinition.Column('Inserted Date', SqlType.date()), TableDefinition.Column('Date', SqlType.date()), TableDefinition.Column('Account Id', SqlType.varchar(50)), TableDefinition.Column('Account Name', SqlType.text()), TableDefinition.Column('Campaign Id', SqlType.varchar(50)), TableDefinition.Column('Campaign Name', SqlType.text()), TableDefinition.Column('Impressions', SqlType.big_int()), TableDefinition.Column('Clicks', SqlType.big_int()), TableDefinition.Column('Reach', SqlType.big_int()), TableDefinition.Column('Spend', SqlType.double()), TableDefinition.Column('Frequency', SqlType.double()), ]) print("The table is defined.") LogFileWrite("Successfully Facebook Campaign Table is defined") connection.catalog.create_table(DataTable) # print(Campaign_df["Id"].dtype) #print(range(len(Campaign_df["Id"]))) with Inserter(connection, TableName('Extract','Campaign_data')) as inserter: inserted_rows=1 row_id=1 for i in range(0,len(result["Campaign Id"])): #print(str(result.loc[i,"CampaignId"])) #print(result.loc[i,"Date"]) inserter.add_row([ int(row_id), datetime.today(), (datetime.strptime(result.loc[i,"Date"], '%Y-%m-%d')), #(datetime.date(result.loc[i,"Date"])),#, "%Y-%m-%d")), str(result.loc[i,"Account Id"]), str(result.loc[i,"Account Name"]), str(result.loc[i,"Campaign Id"]), str(result.loc[i,"Campaign Name"]), int(result.loc[i,"Impressions"]), int(result.loc[i,"Clicks"]), int(result.loc[i,"Reach"]), float(result.loc[i,"Spend"]), float(result.loc[i,"Frequency"]) ]) #print("instered") row_id=row_id+1 inserted_rows=inserted_rows+1 inserter.execute() print("Instered Rows are " +str(inserted_rows)) LogFileWrite("Instered Rows are " +str(inserted_rows)) table_name=TableName('Extract','Campaign_data') Delet_query=f"DELETE FROM {table_name} WHERE " +'"'+ 'Row_ID'+'"'+" NOT IN(" Delet_query+="SELECT MAX("+'"'+'Row_ID'+'"'+f") FROM {table_name} " Delet_query+="GROUP BY " +'"'+'Date'+'",'+'"'+'Campaign Id'+'",'+'"'+'Campaign Name'+'",' Delet_query+='"'+'Account Id'+'",'+'"'+'Impressions'+'",' Delet_query+='"'+'Clicks'+'",'+'"'+'Account Name'+'",'+'"'+'Reach'+'",'+'"'+'Spend'+'",' Delet_query+='"'+'Frequency'+'")' #print(Delet_query) connection.execute_command(Delet_query) print("Deleted Duplicate rows") LogFileWrite("Successfully deleted Duplicate rows") else: print("unable to open connection to hyper file") LogFileWrite("unable to open connection to hyper file") if connection.is_open==True: connection.close() print("Connection to Hyper file closed") LogFileWrite("Connection to Hyper file closed") else: print("Connection to Hyper file closed") LogFileWrite("Connection to Hyper file closed") print("Connection is open or closed" + str(connection.is_open)) else: print("Unable to start the Hyper process ") LogFileWrite("Unable to start the Hyper process ") if hyperprocess.is_open==True: hyperprocess.close() print("Forcefully shutted down the Hyper Process") LogFileWrite("Forcefully shutted down the Hyper Process") else: print("Hyper process is shutted down") LogFileWrite("Hyper process is shutted down") print("Connection is open or closed" + str(connection.is_open)) print("process is open or closed" + str(hyperprocess.is_open)) except HyperException as ex: LogFileWrite("There is exception in starting Tableau Hyper Process. Exiting...") LogFileWrite(str(ex)) connection.close() hyperprocess.close() SendEmailMessage() sys.exit()
# Creates a new Hyper file or # replaces the file if it already exists. with Connection(endpoint=hyper.endpoint, database="drinks.hyper", create_mode=CreateMode.CREATE_AND_REPLACE) as connection: # default schema name is "public" # define table drinks_table = TableDefinition( table_name="drinks", columns=[ TableDefinition.Column("country", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("beer_servings", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("spirit_servings", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("wine_servings", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("total_litres_of_pure_alcohol", SqlType.double(), NOT_NULLABLE), TableDefinition.Column("continent", SqlType.text(), NOT_NULLABLE) ]) # create tables connection.catalog.create_table(drinks_table) path_to_csv = "drinks.csv" print(drinks_table.table_name) count_in_drinks_table = connection.execute_command(
def __init__(self): self.location = "./output/meli_challenge_result.hyper" self.test_location = "../output/meli_challenge_result.hyper" self.searchResult_table = TableDefinition('results', [ TableDefinition.Column('id', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('site_id', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('title', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('seller', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('price', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('prices', SqlType.json(), Nullability.NOT_NULLABLE), TableDefinition.Column('sale_price', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('currency_id', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('available_quantity', SqlType.int(), Nullability.NULLABLE), TableDefinition.Column('sold_quantity', SqlType.int(), Nullability.NULLABLE), TableDefinition.Column('buying_mode', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('listing_type_id', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('stop_time', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('condition', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('permalink', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('thumbnail', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('accepts_mercadopago', SqlType.bool(), Nullability.NULLABLE), TableDefinition.Column('installments', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('address', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('shipping', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('seller_address', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('attributes', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('original_price', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('category_id', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('official_store_id', SqlType.int(), Nullability.NULLABLE), TableDefinition.Column('domain_id', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('catalog_product_id', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('tags', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('catalog_listing', SqlType.bool(), Nullability.NULLABLE), TableDefinition.Column('order_backend', SqlType.int(), Nullability.NULLABLE), ])
def run_insert_spatial_data_to_a_hyper_file(): """ An example of how to add spatial data to a Hyper file. """ print("EXAMPLE - Add spatial data to a Hyper file ") path_to_database = Path("spatial_data.hyper") # Starts the Hyper Process with telemetry enabled to send data to Tableau. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU. with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Creates new Hyper file "spatial_data.hyper". # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists. with Connection( endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema( schema=extract_table.table_name.schema_name) connection.catalog.create_table(table_definition=extract_table) # Hyper API's Inserter allows users to transform data during insertion. # To make use of data transformation during insertion, the inserter requires the following inputs # 1. The connection to the Hyper instance containing the table. # 2. The table name or table defintion into which data is inserted. # 3. List of Inserter.ColumnMapping. # This list informs the inserter how each column in the target table is tranformed. # The list must contain all the columns into which data is inserted. # "Inserter.ColumnMapping" maps a valid SQL expression (if any) to a column in the target table. # For example Inserter.ColumnMapping('target_column_name', f'{escape_name("colA")}*{escape_name("colB")}') # The column "target_column" contains the product of "colA" and "colB" after successful insertion. # SQL expression string is optional in Inserter.ColumnMapping. # For a column without any transformation only the column name is required. # For example Inserter.ColumnMapping('no_data_transformation_column') # 4. The Column Definition of all input values provided to the Inserter # Inserter definition contains the column definition for the values that are inserted # The data input has two text values Name and Location_as_text inserter_definition = [ TableDefinition.Column(name='Name', type=SqlType.text(), nullability=NOT_NULLABLE), TableDefinition.Column(name='Location_as_text', type=SqlType.text(), nullability=NOT_NULLABLE) ] # Column 'Name' is inserted into "Extract"."Extract" as-is. # Column 'Location' in "Extract"."Extract" of geography type is computed from Column 'Location_as_text' of text type # using the expression 'CAST("Location_as_text") AS GEOGRAPHY'. # Inserter.ColumnMapping is used for mapping the CAST expression to Column 'Location'. column_mappings = [ 'Name', Inserter.ColumnMapping( 'Location', f'CAST({escape_name("Location_as_text")} AS GEOGRAPHY)') ] # Data to be inserted. data_to_insert = [['Seattle', "point(-122.338083 47.647528)"], ['Munich', "point(11.584329 48.139257)"]] # Insert data into "Extract"."Extract" table with CAST expression. with Inserter(connection, extract_table, column_mappings, inserter_definition=inserter_definition) as inserter: inserter.add_rows(rows=data_to_insert) inserter.execute() print("The data was added to the table.") print("The connection to the Hyper file has been closed.") print("The Hyper process has been shut down.")
from tableauhyperapi import HyperProcess, Telemetry, \ Connection, CreateMode, \ NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \ Inserter, \ escape_name, escape_string_literal, \ TableName, Name, \ HyperException # The table is called "Extract" and will be created in the "Extract" schema. # This has historically been the default table name and schema for extracts created by Tableau extract_table = TableDefinition( table_name=TableName("Extract", "Extract"), columns=[ TableDefinition.Column(name='Name', type=SqlType.text(), nullability=NOT_NULLABLE), TableDefinition.Column(name='Location', type=SqlType.geography(), nullability=NOT_NULLABLE) ]) def run_insert_spatial_data_to_a_hyper_file(): """ An example of how to add spatial data to a Hyper file. """ print("EXAMPLE - Add spatial data to a Hyper file ") path_to_database = Path("spatial_data.hyper") # Starts the Hyper Process with telemetry enabled to send data to Tableau. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
hyper_file_path = "hyper_for_csv.hyper" table_name = TableName("Extract", "Extract") output_name = "output.csv" path_to_database = Path(hyper_file_path) # The table is called "Extract" and will be created in the "Extract" schema # and contains four columns. extract_table = TableDefinition( table_name=table_name, columns=[ TableDefinition.Column(name='Customer ID', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='Customer Name', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='Loyalty Reward Points', type=SqlType.big_int(), nullability=NOT_NULLABLE), TableDefinition.Column(name='Segment', type=SqlType.text(), nullability=NULLABLE) ]) def insert_data(): """ Creates a simple .hyper file. For more on this, see the below example: https://github.com/tableau/hyper-api-samples/blob/main/Tableau-Supported/Python/insert_data_into_single_table.py """ print("Creating single table for conversion.")
# -*- coding: utf-8 -*- """ Created on Thu Jul 2 21:35:47 2020 @author: Manish Chauhan || +91-9774083186 """ from tableauhyperapi import HyperProcess, Inserter, Telemetry, Connection, CreateMode, NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, escape_name, escape_string_literal, HyperException, TableName # Starting local hyper instance extract_table = TableDefinition( table_name=TableName('extract', 'extract'), columns=[ TableDefinition.Column("Sno", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Name", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Sale", SqlType.big_int()), TableDefinition.Column("Profit", SqlType.big_int()), #TableDefinition.Column("Date", SqlType.date()) ]) data_to_insert = [["1", "Vishakha", 99999, 80]] def run_create_hyper_file_from_csv(): print("Inside Fucntion to pick data from CSV into table in new Hyper file") with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists with Connection( endpoint=hyper.endpoint,
from pathlib import Path from tableauhyperapi import HyperProcess, Telemetry, \ Connection, CreateMode, \ NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \ Inserter, \ escape_name, escape_string_literal, \ HyperException product_table = TableDefinition( # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace. table_name="Products", columns=[ TableDefinition.Column("category", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("title", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("price", SqlType.double(), NOT_NULLABLE), TableDefinition.Column("available_quantity", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("sold_quantity", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("permalink", SqlType.text(), NOT_NULLABLE) ]) def call_mlapi_to_dict(SearchCategory): print("Start call_mlapi_to_dict.") try: query = {'1': SearchCategory, 'limit': '50'} response = requests.get(
query_res['column_ex4'] = query_res['column_ex4'].astype(str) query_res['column_ex5'] = query_res['column_ex5'].astype(str) query_res['column_ex6'] = query_res['column_ex6'].astype(str) with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema('Extract') # Define the hyper table (Same definition as the full refresh) example_table = TableDefinition(TableName('Extract', 'Extract'), [ TableDefinition.Column('column_ex', SqlType.varchar(500)), TableDefinition.Column('column_ex2', SqlType.varchar(500)), TableDefinition.Column('column_ex_int', SqlType.int()), TableDefinition.Column('column_ex3', SqlType.varchar(500)), TableDefinition.Column('column_ex_int2', SqlType.int()), TableDefinition.Column('column_ex4', SqlType.varchar(500)), TableDefinition.Column('column_ex5', SqlType.varchar(500)), TableDefinition.Column('column_ex_int3', SqlType.int()), TableDefinition.Column('column_ex6', SqlType.varchar(500)), ]) print("The table is defined.") connection.catalog.create_table(table_definition=example_table) # Insert data from dataframe to hyper table with Inserter(connection, example_table) as inserter: for i in range(len(query_res)): inserter.add_row( [ query_res['column_ex'][i], \ query_res['column_ex2'][i], \ int(query_res['column_ex_int'][i]), \ query_res['column_ex3'][i], \