def test_multiple_tables(df, tmp_hyper, table_name, table_mode): # Write twice; depending on mode this should either overwrite or duplicate entries pantab.frames_to_hyper({ table_name: df, "table2": df }, tmp_hyper, table_mode=table_mode) pantab.frames_to_hyper({ table_name: df, "table2": df }, tmp_hyper, table_mode=table_mode) result = pantab.frames_from_hyper(tmp_hyper) expected = df.copy() if table_mode == "a": expected = pd.concat([expected, expected]).reset_index(drop=True) # some test trickery here if not isinstance(table_name, TableName) or table_name.schema_name is None: table_name = TableName("public", table_name) assert set(result.keys()) == set( (table_name, TableName("public", "table2"))) for val in result.values(): assert_roundtrip_equal(val, expected)
def create_schema(self, schema_dss, destination_file_path): """ Read the Tableau Hyper file an. :param schema_dss: DSS schema from the DSS dataset to export example: [{"columns": [{"name": "customer_id", "type": "bigint"}, ...]}, ...] :param destination_file_path: :return: """ # Read the destination file of the dss self.output_file = destination_file_path logger.info( "Writing the Tableau Hyper file to the following location: {}". format(destination_file_path)) logger.info( "The dataset to export has the following schema: {}".format( schema_dss)) dss_columns = schema_dss['columns'] dss_storage_types = [ column_descriptor['type'] for column_descriptor in dss_columns ] self.schema_converter.set_dss_storage_types(dss_storage_types) self.is_geo_table = dss_is_geo(schema_dss) logger.info("The input dataset contains a geo column: {}".format( self.is_geo_table)) if not self.schema_name or not self.table_name: logger.warning("Did not received the table or schema name.") raise ValueError("No valid schema or table name received.") logger.info("Received target schema {} and table {}".format( self.schema_name, self.table_name)) # Create the Tableau Hyper schema from the DSS schema self.output_table_definition = TableDefinition( TableName(self.schema_name, self.table_name), self.schema_converter.dss_columns_to_hyper_columns(dss_columns)) # Open connection to file self.hyper = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) self.connection = Connection(self.hyper.endpoint, self.output_file, CreateMode.CREATE_AND_REPLACE) assert self.connection is not None self.connection.catalog.create_schema(self.schema_name) self.connection.catalog.create_table(self.output_table_definition) # Handle the geo case if self.is_geo_table: logger.info("Detected geo column. Creating a temporary table...") dss_tmp_schema = geo_to_text(schema_dss) dss_tmp_columns = dss_tmp_schema['columns'] self.tmp_table_definition = TableDefinition( TableName(self.schema_name, "tmp_" + self.table_name), self.schema_converter.dss_columns_to_hyper_columns( dss_tmp_columns)) self.connection.catalog.create_table(self.tmp_table_definition) logger.info("Created temporary table")
def read_hyper_columns(self): """ Read from the Tableau Hyper file the columns and schema of the table :return: self.hyper_storage_types """ logger.info("Trying to read Tableau Hyper table {}.{} ...".format( self.schema_name, self.table_name)) hyper_table = TableName(self.schema_name, self.table_name) self.hyper_table = hyper_table try: table_def = self.connection.catalog.get_table_definition( hyper_table) except HyperException as e: logger.warning( "The target table does not exists in this hyper file. Requested table: {}.{}" .format(self.table_name, self.schema_name)) raise Exception("Table does not exist: {}.{}".format( self.schema_name, self.table_name)) self.hyper_columns = table_def.columns self.hyper_storage_types = [ column.type.tag for column in self.hyper_columns ] self.dss_columns = self.schema_converter.hyper_columns_to_dss_columns( self.hyper_columns) self.dss_storage_types = [ column['type'] for column in self.dss_columns ] self.schema_converter.set_dss_storage_types(self.dss_storage_types) self.schema_converter.set_hyper_storage_types(self.hyper_storage_types)
def fn_write_data_into_hyper_file(self, in_logger, timer, in_dict): if in_dict['action'] == 'append': self.fn_get_records_count_from_table( in_logger, timer, { 'connection': in_dict['connection'], 'schema name': in_dict['schema name'], 'table name': in_dict['table name'], }) hyper_table = in_dict['connection'].catalog.get_table_definition( TableName('Extract', 'Extract')) elif in_dict['action'] == 'overwrite': self.fn_create_hyper_schema(in_logger, timer, in_dict) hyper_table = self.fn_create_hyper_table( in_logger, timer, { 'columns': in_dict['hyper table columns'], 'connection': in_dict['connection'], 'schema name': in_dict['schema name'], 'table name': in_dict['table name'], }) self.fn_insert_data_into_hyper_table( in_logger, timer, { 'connection': in_dict['connection'], 'data': in_dict['data'], 'table': hyper_table, }) self.fn_get_records_count_from_table( in_logger, timer, { 'connection': in_dict['connection'], 'schema name': in_dict['schema name'], 'table name': in_dict['table name'], })
def run_create_hyper_file_from_csv(): print("Inside Fucntion to pick data from CSV into table in new Hyper file") with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists with Connection( endpoint=hyper.endpoint, database='C:/Users/admin/Desktop/extrp1.hyper', create_mode=CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema('extract') connection.catalog.create_table(table_definition=extract_table) # create a path that locates CSV file to be used path_to_csv = 'C:/Users/admin/Desktop/testing1.csv' # `execute_command` executes a SQL statement and returns the impacted row count. count_in_table = connection.execute_command( command= f"COPY {extract_table.table_name} from {escape_string_literal(path_to_csv)} with " f"(format csv, NULL 'NULL', delimiter ',', header)") print(count_in_table) with Inserter(connection, TableName('extract', 'extract')) as inserter: inserter.add_rows(rows=data_to_insert) inserter.execute()
def test_insert_data_into_hyper_file(self): data = [(1001, "Jane", "Doe"), (1002, "John", "Doe"), (2201, "Elonzo", "Smith")] name = "output" table_def = TableDefinition( table_name=TableName("Extract", "Extract"), columns=[ TableDefinition.Column(name=Name("id"), type=SqlType.big_int(), nullability=NULLABLE), TableDefinition.Column(name=Name("first_name"), type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name=Name("last_name"), type=SqlType.text(), nullability=NULLABLE) ]) path = insert_data_into_hyper_file(data, name, table_def) print(f'Database Path : {path}') tables = TestUtils.get_tables("Extract", "/tmp/hyperleaup/output/output.hyper") assert (len(tables) == 1) num_rows = TestUtils.get_row_count( "Extract", "Extract", "/tmp/hyperleaup/output/output.hyper") assert (num_rows == 3)
def get_table_def(df: DataFrame, schema_name: str, table_name: str) -> TableDefinition: """Returns a Tableau TableDefintion given a Spark DataFrame""" schema = df.schema cols = list(map(convert_struct_field, schema)) return TableDefinition(table_name=TableName("Extract", "Extract"), columns=cols)
def to_hyper(df, hyper_file_name, custom_schema="Extract", custom_table_name="Extract"): """ Write a Tableau Hyper file from a Pandas DataFrame. Currently can only write single table extracts, which is Tableau's default way of creating an extract. Args: df: Specify which DataFrame you want to output hyper_file_name: Specify the file name such as "Example.hyper" custom_schema: If you need to change the schema name. Defaults to "Extract" custom_table_name: If you need to change the schema name. Defaults to "Extract" Returns: Tableau Hyper file """ # Starts the Hyper Process with HyperProcess( telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU, parameters={"log_config": ""}, ) as hyper: # Creates a .hyper file to put the data into with Connection(hyper.endpoint, hyper_file_name, CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema(custom_schema) # create a .hyper compatible column definition # from pd DataFrame column names and dtypes # using 3 list comprehensions to loop through # all the columns in the DataFrame column_names = [column for column in df.columns] column_dtype = [dtype for dtype in df.dtypes] hyper_table = TableDefinition( TableName(custom_schema, custom_table_name), [ TableDefinition.Column( column_names[column], dtype_mapper[str( column_dtype[column])]) for column in range(len(column_names)) ], ) connection.catalog.create_table(hyper_table) # Repace NaN with None, otherwise it will not be Null in Tableau df.replace({np.nan: None}, inplace=True) # Insert the data values into the hyper file data_to_insert = df.to_numpy() with Inserter(connection, hyper_table) as inserter: inserter.add_rows(tqdm((row for row in data_to_insert))) inserter.execute()
def __init__(self, path="superstore.hyper"): super().__init__() self._path = Path(__file__).parent / path self._table_name = TableName("Extract", "Extract") self._hyper = HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) self._connection = Connection( endpoint=self._hyper.endpoint, database=self._path)
def createTabTable(tableName, columnHeading, dataTypes): extract_table = TableDefinition(table_name=TableName("Extract", tableName)) for head in columnHeading: extract_table.add_column(head, eval(mapper[dataTypes.pop(0)])) return extract_table
def get_table_def(df: DataFrame, schema_name: str = 'Extract', table_name: str = 'Extract') -> TableDefinition: """Returns a Tableau TableDefintion given a Spark DataFrame""" schema = df.schema cols = list(map(HyperUtils.convert_struct_field, schema)) return TableDefinition(table_name=TableName(schema_name, table_name), columns=cols)
def test_read_non_roundtrippable(datapath): result = pantab.frame_from_hyper(datapath / "dates.hyper", table=TableName("Extract", "Extract")) expected = pd.DataFrame( [["1900-01-01", "2000-01-01"], [pd.NaT, "2050-01-01"]], columns=["Date1", "Date2"], dtype="datetime64[ns]", ) tm.assert_frame_equal(result, expected)
def test_reads_non_writeable_strings(datapath): result = pantab.frame_from_hyper( datapath / "non_pantab_writeable.hyper", table=TableName("public", "table") ) expected = pd.DataFrame([["row1"], ["row2"]], columns=["Non-Nullable String"]) if compat.PANDAS_100: expected = expected.astype("string") tm.assert_frame_equal(result, expected)
def test_hyper_columns_to_dss_columns(self): schema_converter = SchemaConversion() path_to_hyper = "data/superstore_sample.hyper" hyper = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) connection = Connection(hyper.endpoint, path_to_hyper) hyper_table = connection.catalog.get_table_definition( TableName('public', 'Customer')) connection.close() hyper.close() dss_columns = schema_converter.hyper_columns_to_dss_columns( hyper_table.columns) return True
def fn_create_hyper_table(self, local_logger, timer, in_dict): timer.start() out_hyper_table = TableDefinition( TableName(in_dict['schema name'], in_dict['table name']), columns=in_dict['columns'], ) in_dict['connection'].catalog.create_table(table_definition=out_hyper_table) local_logger.info(self.locale.gettext( 'Hyper table "{hyper_table_name}" has been created') .replace('{hyper_table_name}', in_dict['table name'])) timer.stop() return out_hyper_table
def df_to_extract(df, output_path): ''' Converts a Pandas dataframe to a Tableau Extract. Parameters ---------- df (pandas dataframe): Dataframe to turn into a Tableau extract output_path (str): Where to create the Tableau extract ''' # Replace nan's with 0 df = df.replace(np.nan, 0.0, regex=True) print('Creating Tableau data extract...') with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(hyper.endpoint, output_path, CreateMode.CREATE_AND_REPLACE) as connection: # Create schema connection.catalog.create_schema('Extract') # Create list of column definitions, based on the datatypes in pandas dataframe dtype_map = { 'int32': SqlType.int(), 'int64': SqlType.big_int(), 'float32': SqlType.double(), 'float64': SqlType.double(), 'datetime64[ns]': SqlType.date(), 'object': SqlType.text() } table_def = [] # Get column headers to loop through them df_columns = list(df) for col_header in df_columns: dtype_str = str(df[col_header].dtype) # Use dtype_str to lookup appropiate SqlType from dtype_map and append new column to table definition table_def.append(TableDefinition.Column(col_header, dtype_map[dtype_str])) # Define table extract_table = TableDefinition(TableName('Extract', 'Extract'), table_def) # Create table connection.catalog.create_table(extract_table) # Insert data with Inserter(connection, extract_table) as inserter: for idx, row in df.iterrows(): inserter.add_row(row) inserter.execute()
def test_roundtrip_with_external_hyper_connection(df, tmp_hyper): with HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: pantab.frames_to_hyper({ "test": df, "test2": df }, tmp_hyper, hyper_process=hyper) with Connection(hyper.endpoint, tmp_hyper, CreateMode.NONE) as connection: result = pantab.frame_from_hyper(connection, table="test") assert_roundtrip_equal(result, df) result = pantab.frame_from_hyper_query(connection, "SELECT * FROM test") assert result.size == 63 result = pantab.frames_from_hyper(connection) assert set(result.keys()) == set( (TableName("public", "test"), TableName("public", "test2"))) for val in result.values(): assert_roundtrip_equal(val, df)
def run(self, args): """ Runs the command :param args: Arguments from argparse.Namespace """ input_file = args.input_file output_file = args.output_file wkt_file = Path(args.wkt_path) role_name = args.role_name id_field = args.id_field # Grab the CSV csv_query = CsvQueryClass() csv_query.open_csv(wkt_file) # if the output file already exists, delete if os.path.exists(output_file): if os.name == "nt": # deal with Windows os.remove(output_file) else: subprocess.call(["rm", "-rf", output_file]) shutil.copyfile(input_file, output_file) # Starts the Hyper Process with telemetry enabled to send data to Tableau. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU. with HyperProcess( telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(endpoint=hyper.endpoint, database=output_file) as connection: table_name = TableName( "public", "LocalData" + role_name ) #TODO: Make this dynamic based on an input parameter. geo_name = Name('Geometry') map_code_name = Name('MapCode') latitude_name = Name('Latitude') longitude_name = Name('Longitude') connection.execute_query( f"ALTER TABLE {table_name} ADD COLUMN {geo_name} TEXT," f" ADD COLUMN {map_code_name} INTEGER").close() for mrow in csv_query.rows: lat = mrow['Latitude'] lng = mrow['Longitude'] wkt = mrow['WKT'] with connection.execute_query( f"UPDATE {table_name}" + f" SET {geo_name}={escape_string_literal(wkt)}, {map_code_name}=0" + f" WHERE {latitude_name}={lat}" f" AND {longitude_name}={lng}") as result: print(f"{result.affected_row_count} rows changed") print('done')
def hyper_prepare(hyper_path, functional_ordered_column, column_value): """Function that prepares the given hyper file: based on the hyper's path, the functional ordered column and its value, the hyper file is cleaned from the latest set of data by deleting all data with the given column value or greater values """ path_to_database = Path(hyper_path).expanduser().resolve() with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU, user_agent=os.path.basename(__file__)) as hyper: with Connection(endpoint=hyper.endpoint, database=path_to_database) as connection: table_name = TableName("Extract", "Extract") rows_affected = connection.execute_command( command= f'DELETE FROM {table_name} WHERE "{functional_ordered_column}" >= {column_value}' ) return rows_affected
def test_roundtrip_with_external_hyper_process(df, tmp_hyper): default_log_path = Path.cwd() / "hyperd.log" if default_log_path.exists(): default_log_path.unlink() # By passing in a pre-spawned HyperProcess, one can e.g. avoid creating a log file parameters = {"log_config": ""} with HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU, parameters=parameters) as hyper: # test frame_to_hyper/frame_from_hyper pantab.frame_to_hyper(df, tmp_hyper, table="test", hyper_process=hyper) result = pantab.frame_from_hyper(tmp_hyper, table="test", hyper_process=hyper) assert_roundtrip_equal(result, df) # test frame_from_hyper_query result = pantab.frame_from_hyper_query(tmp_hyper, "SELECT * FROM test", hyper_process=hyper) assert result.size == 63 # test frames_to_hyper/frames_from_hyper pantab.frames_to_hyper({ "test2": df, "test": df }, tmp_hyper, hyper_process=hyper) result = pantab.frames_from_hyper(tmp_hyper, hyper_process=hyper) assert set(result.keys()) == set( (TableName("public", "test"), TableName("public", "test2"))) for val in result.values(): assert_roundtrip_equal(val, df) assert not default_log_path.exists()
def print_table_def(self, schema: str = "Extract", table: str = "Extract"): """Prints the table definition for a table in a Hyper file.""" with HyperProcess( telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hp: with Connection(endpoint=hp.endpoint, database=self.path) as connection: table_name = TableName(schema, table) table_definition = connection.catalog.get_table_definition( name=table_name) # Print all column information print("root") for column in table_definition.columns: print( f"|-- {column.name}: {column.type} (nullable = {column.nullability})" )
def create(): with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: request_data = request.get_json() print(request_data) print("The HyperProcess has started.") object_name = "mealprep.hyper" file_name = os.environ.get('bucket_name') with Connection( endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: print("The connection to the Hyper file is open.") connection.catalog.create_schema('Extract') example_table = TableDefinition(TableName('Extract', 'Extract'), [ TableDefinition.Column('Breakfast', SqlType.text()), TableDefinition.Column('Lunch', SqlType.text()), TableDefinition.Column('Dinner', SqlType.text()), ]) print("The table is defined.") connection.catalog.create_table(example_table) print(example_table) print(type(example_table)) with Inserter(connection, example_table) as inserter: for i in request_data['data']: inserter.add_row([i['breakfast'], i['lunch'], i['dinner']]) inserter.execute() print("The data was added to the table.") print("The connection to the Hyper extract file is closed.") print("The HyperProcess has shut down.") with open('mealprep.hyper', 'rb') as reader: if object_name is None: object_name = file_name s3_client = boto3.client( 's3', aws_access_key_id=os.environ.get('aws_access_key_id'), aws_secret_access_key=os.environ.get('aws_secret_access_key')) try: response = s3_client.upload_fileobj(reader, file_name, object_name) except ClientError as e: logging.error(e) return False return redirect(url_for('index'))
def run_read_data_from_existing_hyper_file(): """ An example of how to read and print data from an existing Hyper file. """ print("EXAMPLE - Read data from an existing Hyper file") # Path to a Hyper file containing all data inserted into Customer, Product, Orders and LineItems table. # See "insert_data_into_multiple_tables.py" for an example that works with the complete schema. path_to_source_database = Path( __file__).parent / "data" / "superstore_sample_denormalized.hyper" # Make a copy of the superstore denormalized sample Hyper file path_to_database = Path( shutil.copy( src=path_to_source_database, dst="superstore_sample_denormalized_read.hyper")).resolve() # Starts the Hyper Process with telemetry enabled to send data to Tableau. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU. with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Connect to existing Hyper file "superstore_sample_denormalized_read.hyper". with Connection(endpoint=hyper.endpoint, database=path_to_database) as connection: # The table names in the "Extract" schema (the default schema). table_names = connection.catalog.get_table_names(schema="Extract") for table in table_names: table_definition = connection.catalog.get_table_definition( name=table) print(f"Table {table.name} has qualified name: {table}") for column in table_definition.columns: print( f"Column {column.name} has type={column.type} and nullability={column.nullability}" ) print("") # Print all rows from the "Extract"."Extract" table. table_name = TableName("Extract", "Extract") print(f"These are all rows in the table {table_name}:") # `execute_list_query` executes a SQL query and returns the result as list of rows of data, # each represented by a list of objects. rows_in_table = connection.execute_list_query( query=f"SELECT * FROM {table_name}") print(rows_in_table) print("The connection to the Hyper file has been closed.") print("The Hyper process has been shut down.")
def test_to_dss_date(self): schema_converter = SchemaConversion() path_to_hyper = "data/superstore_sample.hyper" hyper = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) connection = Connection(hyper.endpoint, path_to_hyper) hyper_table = TableName('public', 'Orders') hyper_table_def = connection.catalog.get_table_definition(hyper_table) result = connection.execute_query(f'SELECT * FROM {hyper_table}') for row in result: pass sample_date = row[2].to_date() dss_date = datetime.datetime(sample_date.year, sample_date.month, sample_date.day) connection.close() hyper.close() dss_columns = schema_converter.hyper_columns_to_dss_columns( hyper_table_def.columns) return True
def fn_hyper_read(self, in_logger, timer, in_dict): timer.start() # once Hyper is opened we can get data out query_to_run = f"SELECT * FROM {TableName('Extract', 'Extract')}" in_logger.debug(self.locale.gettext( 'Hyper SQL about to be executed is: {hyper_sql}') .replace('{hyper_sql}', str(query_to_run))) result_set = in_dict['connection'].execute_list_query(query=query_to_run) out_data_frame = pd.DataFrame(result_set) in_logger.debug(self.locale.gettext( 'Hyper SQL executed with success and {rows_counted} have been retrieved') .replace('{rows_counted}', str(len(out_data_frame)))) table_definition = in_dict['connection'].catalog.get_table_definition( name=TableName('Extract', 'Extract')) table_columns = self.fn_get_column_names_from_table(in_logger, { 'table definition': table_definition, }) out_data_frame.set_axis(table_columns, axis='columns', inplace=True) timer.stop() return out_data_frame
def createHyperFile(): dict = parseData() file = "/Users/jharris/Desktop/workbookUsers.hyper" cols = dict['cols'] data = dict['data'] with HyperProcess( telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(hyper.endpoint, file, CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema('Extract') table = TableDefinition( TableName('Extract', 'Extract'), [TableDefinition.Column(col, SqlType.text()) for col in cols]) connection.catalog.create_table(table) with Inserter(connection, table) as inserter: inserter.add_rows(rows=data) inserter.execute()
def _hyper_table_definition(self, source_table, hyper_table_name="Extract"): """ Build a hyper table definition from source_schema source_table (obj): Source table (Instance of google.cloud.bigquery.table.Table) hyper_table_name (string): Name of the target Hyper table, default="Extract" Returns a tableauhyperapi.TableDefinition Object """ logger.debug( "Building Hyper TableDefinition for table {}".format(source_table.reference) ) target_cols = [] for source_field in source_table.schema: this_name = source_field.name this_type = self._hyper_sql_type(source_field) this_col = TableDefinition.Column(name=this_name, type=this_type) # Check for Nullability this_mode = source_field.mode if this_mode == "REPEATED": raise ( HyperSQLTypeMappingError( "Field mode REPEATED is not implemented in Hyper" ) ) if this_mode == "REQUIRED": this_col = TableDefinition.Column( this_name, this_type, Nullability.NOT_NULLABLE ) target_cols.append(this_col) logger.debug("..Column {} - Type {}".format(this_name, this_type)) target_schema = TableDefinition( table_name=TableName("Extract", hyper_table_name), columns=target_cols ) return target_schema
def Incremental_refresh(result): try: with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyperprocess: #print("The HyperProcess has started.") LogFileWrite("The HyperProcess has started.") print(hyperprocess.is_open) if hyperprocess.is_open==True: with Connection(hyperprocess.endpoint, 'Facebook_campaigns.hyper', CreateMode.NONE,) as connection: if connection.is_open==True: print("The connection to the Hyper file is open.") LogFileWrite("The connection to the Hyper file is open.") LogFileWrite("Successfully connected to Facebook Campaign data Table ") # print(Campaign_df["Id"].dtype) #print(range(len(result["Id"]))) table_name=TableName('Extract','Campaign_data') max_rowid_query="SELECT MAX("+'"'+'Row_ID'+'"'+f") FROM {table_name}" row_id=connection.execute_scalar_query(max_rowid_query) row_id=row_id+1 #print(row_id) with Inserter(connection, TableName('Extract','Campaign_data')) as inserter: inserted_rows=1 for i in range(0,len(result["Campaign Id"])): #print(result.loc[i,"Date"]) inserter.add_row([ int(row_id), datetime.today(), (datetime.strptime(result.loc[i,"Date"], '%Y-%m-%d')), str(result.loc[i,"Account Id"]), str(result.loc[i,"Account Name"]), str(result.loc[i,"Campaign Id"]), str(result.loc[i,"Campaign Name"]), int(result.loc[i,"Impressions"]), int(result.loc[i,"Clicks"]), int(result.loc[i,"Reach"]), float(result.loc[i,"Spend"]), float(result.loc[i,"Frequency"]) ]) #print("instered") #i=i+1 inserted_rows=inserted_rows+1 row_id=row_id+1 inserter.execute() #print("Instered Rows are " +str(inserted_rows)) LogFileWrite("Successfully rows are Instered") table_name=TableName('Extract','Campaign_data') Delet_query=f"DELETE FROM {table_name} WHERE " +'"'+ 'Row_ID'+'"'+" NOT IN(" Delet_query+="SELECT MAX("+'"'+'Row_ID'+'"'+f") FROM {table_name} " Delet_query+="GROUP BY " +'"'+'Date'+'",'+'"'+'Campaign Id'+'",'+'"'+'Campaign Name'+'",' Delet_query+='"'+'Account Id'+'",'+'"'+'Impressions'+'",' Delet_query+='"'+'Clicks'+'",'+'"'+'Account Name'+'",'+'"'+'Reach'+'",'+'"'+'Spend'+'",' Delet_query+='"'+'Frequency'+'")' #print(Delet_query) connection.execute_command(Delet_query) print("Deleted Duplicate rows") LogFileWrite("Successfully deleted Duplicate rows") else: print("unable to open connection to hyper file") LogFileWrite("unable to open connection to hyper file") if connection.is_open==True: connection.close() print("Connection to Hyper file closed") LogFileWrite("Connection to Hyper file closed") else: print("Connection to Hyper file closed") LogFileWrite("Connection to Hyper file closed") #print("Connection is open or closed" + str(connection.is_open)) else: print("Unable to start the Hyper process ") LogFileWrite("Unable to start the Hyper process ") if hyperprocess.is_open==True: hyperprocess.close() print("Forcefully shutted down the Hyper Process") LogFileWrite("Forcefully shutted down the Hyper Process") else: print("Hyper process is shutted down") LogFileWrite("Hyper process is shutted down") #print("Connection is open or closed" + str(connection.is_open)) #print("process is open or closed" + str(hyperprocess.is_open)) except HyperException as ex: LogFileWrite("There is exception in starting Tableau Hyper Process. Exiting...") LogFileWrite(str(ex)) connection.close() hyperprocess.close() SendEmailMessage() sys.exit()
def Full_refresh(result): LogFileWrite("Running Full refresh") try: with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyperprocess: print("The HyperProcess has started.") LogFileWrite("The HyperProcess has started.") print(hyperprocess.is_open) if hyperprocess.is_open==True: with Connection(hyperprocess.endpoint, 'Facebook_campaigns.hyper', CreateMode.CREATE_AND_REPLACE,) as connection: if connection.is_open==True: print("The connection to the Hyper file is open.") LogFileWrite("The connection to the Hyper file is open.") connection.catalog.create_schema('Extract') DataTable = TableDefinition(TableName('Extract','Campaign_data'),[ ############Below Columns are extracted from Report data API TableDefinition.Column('Row_ID', SqlType.big_int()), TableDefinition.Column('Inserted Date', SqlType.date()), TableDefinition.Column('Date', SqlType.date()), TableDefinition.Column('Account Id', SqlType.varchar(50)), TableDefinition.Column('Account Name', SqlType.text()), TableDefinition.Column('Campaign Id', SqlType.varchar(50)), TableDefinition.Column('Campaign Name', SqlType.text()), TableDefinition.Column('Impressions', SqlType.big_int()), TableDefinition.Column('Clicks', SqlType.big_int()), TableDefinition.Column('Reach', SqlType.big_int()), TableDefinition.Column('Spend', SqlType.double()), TableDefinition.Column('Frequency', SqlType.double()), ]) print("The table is defined.") LogFileWrite("Successfully Facebook Campaign Table is defined") connection.catalog.create_table(DataTable) # print(Campaign_df["Id"].dtype) #print(range(len(Campaign_df["Id"]))) with Inserter(connection, TableName('Extract','Campaign_data')) as inserter: inserted_rows=1 row_id=1 for i in range(0,len(result["Campaign Id"])): #print(str(result.loc[i,"CampaignId"])) #print(result.loc[i,"Date"]) inserter.add_row([ int(row_id), datetime.today(), (datetime.strptime(result.loc[i,"Date"], '%Y-%m-%d')), #(datetime.date(result.loc[i,"Date"])),#, "%Y-%m-%d")), str(result.loc[i,"Account Id"]), str(result.loc[i,"Account Name"]), str(result.loc[i,"Campaign Id"]), str(result.loc[i,"Campaign Name"]), int(result.loc[i,"Impressions"]), int(result.loc[i,"Clicks"]), int(result.loc[i,"Reach"]), float(result.loc[i,"Spend"]), float(result.loc[i,"Frequency"]) ]) #print("instered") row_id=row_id+1 inserted_rows=inserted_rows+1 inserter.execute() print("Instered Rows are " +str(inserted_rows)) LogFileWrite("Instered Rows are " +str(inserted_rows)) table_name=TableName('Extract','Campaign_data') Delet_query=f"DELETE FROM {table_name} WHERE " +'"'+ 'Row_ID'+'"'+" NOT IN(" Delet_query+="SELECT MAX("+'"'+'Row_ID'+'"'+f") FROM {table_name} " Delet_query+="GROUP BY " +'"'+'Date'+'",'+'"'+'Campaign Id'+'",'+'"'+'Campaign Name'+'",' Delet_query+='"'+'Account Id'+'",'+'"'+'Impressions'+'",' Delet_query+='"'+'Clicks'+'",'+'"'+'Account Name'+'",'+'"'+'Reach'+'",'+'"'+'Spend'+'",' Delet_query+='"'+'Frequency'+'")' #print(Delet_query) connection.execute_command(Delet_query) print("Deleted Duplicate rows") LogFileWrite("Successfully deleted Duplicate rows") else: print("unable to open connection to hyper file") LogFileWrite("unable to open connection to hyper file") if connection.is_open==True: connection.close() print("Connection to Hyper file closed") LogFileWrite("Connection to Hyper file closed") else: print("Connection to Hyper file closed") LogFileWrite("Connection to Hyper file closed") print("Connection is open or closed" + str(connection.is_open)) else: print("Unable to start the Hyper process ") LogFileWrite("Unable to start the Hyper process ") if hyperprocess.is_open==True: hyperprocess.close() print("Forcefully shutted down the Hyper Process") LogFileWrite("Forcefully shutted down the Hyper Process") else: print("Hyper process is shutted down") LogFileWrite("Hyper process is shutted down") print("Connection is open or closed" + str(connection.is_open)) print("process is open or closed" + str(hyperprocess.is_open)) except HyperException as ex: LogFileWrite("There is exception in starting Tableau Hyper Process. Exiting...") LogFileWrite(str(ex)) connection.close() hyperprocess.close() SendEmailMessage() sys.exit()
import shutil from pathlib import Path from tableauhyperapi import HyperProcess, Telemetry, \ Connection, CreateMode, \ NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \ Inserter, \ escape_name, escape_string_literal, \ TableName, Name, \ HyperException # The table is called "Extract" and will be created in the "Extract" schema. # This has historically been the default table name and schema for extracts created by Tableau extract_table = TableDefinition( table_name=TableName("Extract", "Extract"), columns=[ TableDefinition.Column(name='Name', type=SqlType.text(), nullability=NOT_NULLABLE), TableDefinition.Column(name='Location', type=SqlType.geography(), nullability=NOT_NULLABLE) ]) def run_insert_spatial_data_to_a_hyper_file(): """ An example of how to add spatial data to a Hyper file. """ print("EXAMPLE - Add spatial data to a Hyper file ")