Esempio n. 1
0
def convert_struct_field(column: StructField) -> TableDefinition.Column:
    """Converts a Spark StructField to a Tableau Hyper SqlType"""
    if column.dataType == IntegerType():
        sql_type = SqlType.int()
    elif column.dataType == LongType():
        sql_type = SqlType.big_int()
    elif column.dataType == ShortType():
        sql_type = SqlType.small_int()
    elif column.dataType == DoubleType():
        sql_type = SqlType.double()
    elif column.dataType == FloatType():
        sql_type = SqlType.double()
    elif column.dataType == BooleanType():
        sql_type = SqlType.bool()
    elif column.dataType == DateType():
        sql_type = SqlType.date()
    elif column.dataType == TimestampType():
        sql_type = SqlType.timestamp()
    elif column.dataType == StringType():
        sql_type = SqlType.text()
    else:
        # Trap the DecimalType case
        if str(column.dataType).startswith("DecimalType"):
            # Max precision is only up to 18 decimal places in Tableau Hyper API
            precision = column.dataType.precision if column.dataType.precision <= 18 else 18
            scale = column.dataType.scale
            sql_type = SqlType.numeric(precision, scale)
        else:
            raise ValueError(f'Invalid StructField datatype for column `{column.name}` : {column.dataType}')
    nullable = NULLABLE if column.nullable else NOT_NULLABLE
    return TableDefinition.Column(name=column.name, type=sql_type, nullability=nullable)
Esempio n. 2
0
 def test_insert_data_into_hyper_file(self):
     data = [(1001, "Jane", "Doe"), (1002, "John", "Doe"),
             (2201, "Elonzo", "Smith")]
     name = "output"
     table_def = TableDefinition(
         table_name=TableName("Extract", "Extract"),
         columns=[
             TableDefinition.Column(name=Name("id"),
                                    type=SqlType.big_int(),
                                    nullability=NULLABLE),
             TableDefinition.Column(name=Name("first_name"),
                                    type=SqlType.text(),
                                    nullability=NULLABLE),
             TableDefinition.Column(name=Name("last_name"),
                                    type=SqlType.text(),
                                    nullability=NULLABLE)
         ])
     path = insert_data_into_hyper_file(data, name, table_def)
     print(f'Database Path : {path}')
     tables = TestUtils.get_tables("Extract",
                                   "/tmp/hyperleaup/output/output.hyper")
     assert (len(tables) == 1)
     num_rows = TestUtils.get_row_count(
         "Extract", "Extract", "/tmp/hyperleaup/output/output.hyper")
     assert (num_rows == 3)
def convert_datatype(coldatatype):
    """
    [summary]
        This converts the datatype of the column of a given dataframe.
    
    Args:
        Datatype of the column in string format
    
    Returns:
        The tableau hyper extract compatible datatype after converting the dataframe datatype and a default value for NaN cases
    """

    datatype = SqlType.text()
    def_value = ''

    if 'datetime' in coldatatype.lower():
        datatype = SqlType.timestamp()
    elif 'str' in coldatatype.lower():
        datatype = SqlType.text()
    elif 'boolean' in coldatatype.lower():
        datatype = SqlType.bool()
    elif 'int' in coldatatype.lower():
        datatype = SqlType.int()
        def_value = 0
    elif 'float' in coldatatype.lower():
        datatype = SqlType.double()
        def_value = 0
    elif 'period' in coldatatype.lower():
        datatype = SqlType.interval()
    elif 'object' in coldatatype.lower():
        datatype = SqlType.text()
    else:
        datatype = SqlType.text()

    return (datatype, def_value)
Esempio n. 4
0
def createHyperTable():
    columns = getConfig()
    page_table = TableDefinition(table_name="Page")
    for i in range(0, len(columns)):
        if columns[i]['DataType'] == "String":
            dt = SqlType.text()
        elif columns[i]['DataType'] == "Date":
            dt = SqlType.date()
        else:
            dt = SqlType.big_int()
        page_table.add_column(columns[i]['ColumnName'], dt)
    return page_table
Esempio n. 5
0
def create():
    with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        request_data = request.get_json()
        print(request_data)
        print("The HyperProcess has started.")
        object_name = "mealprep.hyper"
        file_name = os.environ.get('bucket_name')

        with Connection(
                endpoint=hyper.endpoint,
                database=path_to_database,
                create_mode=CreateMode.CREATE_AND_REPLACE) as connection:
            print("The connection to the Hyper file is open.")
            connection.catalog.create_schema('Extract')
            example_table = TableDefinition(TableName('Extract', 'Extract'), [
                TableDefinition.Column('Breakfast', SqlType.text()),
                TableDefinition.Column('Lunch', SqlType.text()),
                TableDefinition.Column('Dinner', SqlType.text()),
            ])
            print("The table is defined.")
            connection.catalog.create_table(example_table)
            print(example_table)
            print(type(example_table))
            with Inserter(connection, example_table) as inserter:
                for i in request_data['data']:
                    inserter.add_row([i['breakfast'], i['lunch'], i['dinner']])

                inserter.execute()
                print("The data was added to the table.")

            print("The connection to the Hyper extract file is closed.")
        print("The HyperProcess has shut down.")

        with open('mealprep.hyper', 'rb') as reader:
            if object_name is None:
                object_name = file_name
            s3_client = boto3.client(
                's3',
                aws_access_key_id=os.environ.get('aws_access_key_id'),
                aws_secret_access_key=os.environ.get('aws_secret_access_key'))
            try:
                response = s3_client.upload_fileobj(reader, file_name,
                                                    object_name)
            except ClientError as e:
                logging.error(e)
                return False

    return redirect(url_for('index'))
Esempio n. 6
0
    def test_get_table_def(self):
        data = [
            (1001, 1, "Jane", "Doe", "2000-05-01", 29.0, False),
            (1002, 2, "John", "Doe", "1988-05-03", 33.0, False),
            (2201, 3, "Elonzo", "Smith", "1990-05-03", 21.0, True),
            (None, None, None, None, None, None, None)  # Test Nulls
        ]
        df = get_spark_session()\
            .createDataFrame(data, ["id", "dept_id", "first_name", "last_name", "dob", "age", "is_temp"])\
            .createOrReplaceTempView("employees")
        df = get_spark_session().sql(
            "select id, cast(dept_id as short), first_name, "
            "last_name, dob, age, is_temp from employees")
        table_def = get_table_def(df, "Extract", "Extract")

        # Ensure that the Table Name matches
        assert (table_def.table_name.name == Name("Extract"))

        # Ensure that the the TableDefinition column names match
        assert (table_def.get_column(0).name == Name("id"))
        assert (table_def.get_column(1).name == Name("dept_id"))
        assert (table_def.get_column(2).name == Name("first_name"))
        assert (table_def.get_column(3).name == Name("last_name"))
        assert (table_def.get_column(4).name == Name("dob"))
        assert (table_def.get_column(5).name == Name("age"))
        assert (table_def.get_column(6).name == Name("is_temp"))

        # Ensure that the column data types were converted correctly
        assert (table_def.get_column(0).type == SqlType.big_int())
        assert (table_def.get_column(1).type == SqlType.small_int())
        assert (table_def.get_column(2).type == SqlType.text())
        assert (table_def.get_column(3).type == SqlType.text())
        assert (table_def.get_column(4).type == SqlType.text())
        assert (table_def.get_column(5).type == SqlType.double())
        assert (table_def.get_column(6).type == SqlType.bool())
Esempio n. 7
0
    def test_convert_struct_field(self):
        # ensure strings can be converted correctly
        first_name_col = StructField('first_name', StringType(), False)
        converted_col = convert_struct_field(first_name_col)
        assert (converted_col.name == Name('first_name'))
        assert (converted_col.nullability is NOT_NULLABLE)
        assert (converted_col.type == SqlType.text())

        # ensure dates can be converted correctly
        date_col = StructField('update_date', DateType(), True)
        converted_col = convert_struct_field(date_col)
        assert (converted_col.name == Name('update_date'))
        assert (converted_col.nullability is NULLABLE)
        assert (converted_col.type == SqlType.date())

        # ensure timestamps can be converted correctly
        timestamp_col = StructField('created_at', TimestampType(), False)
        converted_col = convert_struct_field(timestamp_col)
        assert (converted_col.name == Name('created_at'))
        assert (converted_col.nullability is NOT_NULLABLE)
        assert (converted_col.type == SqlType.timestamp())
def df_to_extract(df, output_path):
    '''
    Converts a Pandas dataframe to a Tableau Extract.

    Parameters
    ----------
    df (pandas dataframe): Dataframe to turn into a Tableau extract
    output_path (str): Where to create the Tableau extract
    ''' 

    # Replace nan's with 0
    df = df.replace(np.nan, 0.0, regex=True)

    print('Creating Tableau data extract...')
    with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with Connection(hyper.endpoint, output_path, CreateMode.CREATE_AND_REPLACE) as connection:
            
            # Create schema
            connection.catalog.create_schema('Extract')

            # Create list of column definitions, based on the datatypes in pandas dataframe
            dtype_map = {
                'int32': SqlType.int(),
                'int64': SqlType.big_int(),
                'float32': SqlType.double(),
                'float64': SqlType.double(),
                'datetime64[ns]': SqlType.date(),
                'object': SqlType.text() 
            }
            table_def = []

            # Get column headers to loop through them
            df_columns = list(df)

            for col_header in df_columns:
                dtype_str = str(df[col_header].dtype)

                # Use dtype_str to lookup appropiate SqlType from dtype_map and append new column to table definition
                table_def.append(TableDefinition.Column(col_header, dtype_map[dtype_str]))
                
            # Define table
            extract_table = TableDefinition(TableName('Extract', 'Extract'), table_def)

            # Create table
            connection.catalog.create_table(extract_table)

            # Insert data
            with Inserter(connection, extract_table) as inserter:
                for idx, row in df.iterrows():
                    inserter.add_row(row)
                
                inserter.execute() 
def createHyperFile():
    dict = parseData()
    file = "/Users/jharris/Desktop/workbookUsers.hyper"
    cols = dict['cols']
    data = dict['data']

    with HyperProcess(
            telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with Connection(hyper.endpoint, file,
                        CreateMode.CREATE_AND_REPLACE) as connection:
            connection.catalog.create_schema('Extract')

            table = TableDefinition(
                TableName('Extract', 'Extract'),
                [TableDefinition.Column(col, SqlType.text()) for col in cols])

            connection.catalog.create_table(table)

            with Inserter(connection, table) as inserter:
                inserter.add_rows(rows=data)
                inserter.execute()
Esempio n. 10
0
    def hyper_value_to_dss(self, value, tag=SqlType.text().tag):
        """
        Convert the value `value` stored in a Hyper File under the storage type
        `hyper_type_tag` to the apropriate DSS type.

        >>> from tableauhyperapi import TypeTag, SqlType
        >>> assert TypeTag.INT == SqlType.int().tag

        :param value: Value from the Hyper Dataset
        :param tag: Storage type under which the value is stored in Hyper
        :return: output_value : Value compliant to Hyper
        """
        try:  # try to retrieve the conversion function
            conversion_function = self.mapping_hyper_to_dss[tag][1]
        except Exception as err:
            logger.warning("Failed to retrieve the conversion function {}: {}".format(tag, err))
            raise err
        try:
            output_value = conversion_function(value)
        except Exception as err:
            logger.warning("Failed to convert value {} of type {} stored as {}".format(value, type(value), tag))
            raise err
        return output_value
Esempio n. 11
0
    def run(self, args):
        """ Runs the command
        :param args: Arguments from argparse.Namespace
        """
        input_file = Path(args.input_file)
        print("Listing tables with spatial columns")

        # Starts the Hyper Process with telemetry enabled to send data to Tableau.
        # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
        with HyperProcess(
                telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
            # Connects to existing Hyper file
            with Connection(endpoint=hyper.endpoint,
                            database=input_file) as connection:
                catalog = connection.catalog
                # Iterates over all schemas in the input file
                for schema_name in catalog.get_schema_names():
                    # Iterates over all tables in the current schema
                    for table in catalog.get_table_names(schema=schema_name):
                        table_definition = catalog.get_table_definition(
                            name=table)
                        rows_in_table = connection.execute_scalar_query(
                            query=f"SELECT COUNT(*) FROM {table}")
                        spatial_columns = [
                            c.name for c in table_definition.columns
                            if c.type == SqlType.geography()
                        ]
                        if spatial_columns:
                            print(
                                f"Table {table} with {rows_in_table} rows has"
                                f" {len(spatial_columns)} spatial columns: {spatial_columns}"
                            )
                        else:
                            print(
                                f"Table {table} with {rows_in_table} rows has no spatial columns"
                            )
Esempio n. 12
0
import numpy as np
import pandas as pd
from tqdm import tqdm
from tableauhyperapi import (
    HyperProcess,
    Telemetry,
    Connection,
    SqlType,
    TableDefinition,
    CreateMode,
    TableName,
    Inserter,
)

dtype_mapper = {
    "string": SqlType.text(),
    "str": SqlType.text(),
    "object": SqlType.text(),
    "O": SqlType.text(),
    "int64": SqlType.big_int(),
    "float64": SqlType.double(),
    "bool": SqlType.bool(),
    "datetime64[ns]": SqlType.timestamp(),
    "timedelta[ns]": SqlType.interval(),
    "category": SqlType.text(),
}


def read_hyper(path_to_hyper_file, custom_schema="Extract"):
    """Read a Tableau Hyper file and turn it into a Pandas DataFrame.
 def fn_convert_to_hyper_types(given_type):
     switcher = {
         'empty': SqlType.text(),
         'bool': SqlType.bool(),
         'int': SqlType.big_int(),
         'float-dot': SqlType.double(),
         'date-YMD': SqlType.date(),
         'date-MDY': SqlType.date(),
         'date-DMY': SqlType.date(),
         'time-24': SqlType.time(),
         'time-12': SqlType.time(),
         'datetime-24-YMD': SqlType.timestamp(),
         'datetime-12-MDY': SqlType.timestamp(),
         'datetime-24-DMY': SqlType.timestamp(),
         'str': SqlType.text()
     }
     identified_type = switcher.get(given_type)
     if identified_type is None:
         identified_type = SqlType.text()
     return identified_type
def insert_box_events():
    # Hyper file instantiation
    path_to_database = Path(box_hyper_file)
    hyper_file_exists = Path.exists(path_to_database)

    # Start the Hyper API pricess
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:

        # Check if the Hyper file exists or not. CreateMode.NONE will append. CreateMode.CREATE_AND_REPLACE will create a net new file
        create_mode = None
        if hyper_file_exists:
            create_mode = CreateMode.NONE
        else:
            create_mode = CreateMode.CREATE_AND_REPLACE

        # Open a new connection
        with Connection(endpoint=hyper.endpoint,
                        database=path_to_database,
                        create_mode=create_mode) as connection:
            # Check a new schema if it does not exist
            connection.catalog.create_schema_if_not_exists(schema=box_schema)

            # Instantiate the table schema
            box_events_table_def = TableDefinition(
                table_name=TableName(box_schema, box_events_table),
                columns=[
                    TableDefinition.Column(name='event_id',
                                           type=SqlType.text(),
                                           nullability=NULLABLE),
                    TableDefinition.Column(name='event_type',
                                           type=SqlType.text(),
                                           nullability=NULLABLE),
                    TableDefinition.Column(name='created_at',
                                           type=SqlType.timestamp_tz(),
                                           nullability=NULLABLE),
                    TableDefinition.Column(name='created_by_id',
                                           type=SqlType.text(),
                                           nullability=NULLABLE),
                    TableDefinition.Column(name='created_by_name',
                                           type=SqlType.text(),
                                           nullability=NULLABLE),
                    TableDefinition.Column(name='created_by_login',
                                           type=SqlType.text(),
                                           nullability=NULLABLE),
                    TableDefinition.Column(name='source',
                                           type=SqlType.json(),
                                           nullability=NULLABLE),
                    TableDefinition.Column(name='ip_address',
                                           type=SqlType.text(),
                                           nullability=NULLABLE),
                    TableDefinition.Column(name='additional_details',
                                           type=SqlType.json(),
                                           nullability=NULLABLE)
                ])
            print('Found schema: {0} and table def: {1}'.format(
                box_events_table_def.table_name.schema_name,
                box_events_table_def.table_name))
            # Create the table if it does not exist and get the Box events table
            connection.catalog.create_table_if_not_exists(
                table_definition=box_events_table_def)
            table_name = TableName(box_schema, box_events_table)

            # Get the MAX row by created_at
            last_event_created_at = connection.execute_scalar_query(
                query=
                f"SELECT MAX(created_at) FROM {box_events_table_def.table_name}"
            )
            if last_event_created_at is not None:
                print('Found last event in hyper file: {0}'.format(
                    last_event_created_at.to_datetime()))

            # Get the Box service account client
            auth = JWTAuth.from_settings_file(box_config)
            box_client = Client(auth)
            service_account = box_client.user().get()
            print(
                'Found Service Account with name: {0}, id: {1}, and login: {2}'
                .format(service_account.name, service_account.id,
                        service_account.login))

            # Get the current date and the date for one month ago if there is not lastest event
            today = datetime.utcnow()
            if last_event_created_at is None:
                last_event_created_at = today - relativedelta.relativedelta(
                    months=month_lookback)
            else:
                last_event_created_at = last_event_created_at.to_datetime(
                ).replace(tzinfo=timezone.utc).astimezone(tz=None)

            # Get the Box enterprise events for a given date range
            print(
                'Using date range for events  today: {0} and starting datetime: {1}'
                .format(today, last_event_created_at))
            get_box_events(box_client, 0, last_event_created_at, today)

            # Insert the Box enteprise events into the Hyper file
            with Inserter(connection, box_events_table_def) as inserter:
                inserter.add_rows(rows=box_events)
                inserter.execute()

            # Number of rows in the "Box"."Events" table.
            row_count = connection.execute_scalar_query(
                query=f"SELECT COUNT(*) FROM {table_name}")
            print(f"The number of rows in table {table_name} is {row_count}.")
        print("The connection to the Hyper file has been closed.")
    print("The Hyper process has been shut down.")
Esempio n. 15
0
query_res['column_ex'] = query_res['column_ex'].astype(str)
query_res['column_ex2'] = query_res['column_ex2'].astype(str)
query_res['column_ex3'] = query_res['column_ex3'].astype(str)
query_res['column_ex4'] = query_res['column_ex4'].astype(str)
query_res['column_ex5'] = query_res['column_ex5'].astype(str)
query_res['column_ex6'] = query_res['column_ex6'].astype(str)

with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
    with Connection(endpoint=hyper.endpoint,
                    database=path_to_database,
                    create_mode=CreateMode.CREATE_AND_REPLACE) as connection:
        connection.catalog.create_schema('Extract')
        # Define the hyper table (Same definition as the full refresh)
        example_table = TableDefinition(TableName('Extract', 'Extract'), [
            TableDefinition.Column('column_ex', SqlType.varchar(500)),
            TableDefinition.Column('column_ex2', SqlType.varchar(500)),
            TableDefinition.Column('column_ex_int', SqlType.int()),
            TableDefinition.Column('column_ex3', SqlType.varchar(500)),
            TableDefinition.Column('column_ex_int2', SqlType.int()),
            TableDefinition.Column('column_ex4', SqlType.varchar(500)),
            TableDefinition.Column('column_ex5', SqlType.varchar(500)),
            TableDefinition.Column('column_ex_int3', SqlType.int()),
            TableDefinition.Column('column_ex6', SqlType.varchar(500)),
        ])
        print("The table is defined.")
        connection.catalog.create_table(table_definition=example_table)
        # Insert data from dataframe to hyper table
        with Inserter(connection, example_table) as inserter:
            for i in range(len(query_res)):
                inserter.add_row(
def Full_refresh(result):
    LogFileWrite("Running Full refresh")
    try:
        with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyperprocess:
            print("The HyperProcess has started.")
            LogFileWrite("The HyperProcess has started.")
            print(hyperprocess.is_open)
            if hyperprocess.is_open==True:
                with Connection(hyperprocess.endpoint, 'Facebook_campaigns.hyper', CreateMode.CREATE_AND_REPLACE,) as connection: 
                    if connection.is_open==True:
                        print("The connection to the Hyper file is open.")
                        LogFileWrite("The connection to the Hyper file is open.")
                        connection.catalog.create_schema('Extract')
                        DataTable = TableDefinition(TableName('Extract','Campaign_data'),[
                        ############Below Columns are extracted from Report data API
                        TableDefinition.Column('Row_ID', SqlType.big_int()),
                        TableDefinition.Column('Inserted Date', SqlType.date()),
                        TableDefinition.Column('Date', SqlType.date()),
                        TableDefinition.Column('Account Id', SqlType.varchar(50)),
                        TableDefinition.Column('Account Name', SqlType.text()),
                        TableDefinition.Column('Campaign Id', SqlType.varchar(50)),
                        TableDefinition.Column('Campaign Name', SqlType.text()),
                        TableDefinition.Column('Impressions', SqlType.big_int()),
                        TableDefinition.Column('Clicks', SqlType.big_int()),
                        TableDefinition.Column('Reach', SqlType.big_int()),
                        TableDefinition.Column('Spend', SqlType.double()),
                        TableDefinition.Column('Frequency', SqlType.double()),
                        ])
                        print("The table is defined.")
                        LogFileWrite("Successfully Facebook Campaign Table is defined")
                        connection.catalog.create_table(DataTable)
                       # print(Campaign_df["Id"].dtype)
                        #print(range(len(Campaign_df["Id"])))
                        
                        with Inserter(connection, TableName('Extract','Campaign_data')) as inserter:
                            inserted_rows=1
                            row_id=1
                            for i in range(0,len(result["Campaign Id"])):
                                #print(str(result.loc[i,"CampaignId"]))
                                #print(result.loc[i,"Date"])
                                inserter.add_row([
                                int(row_id),
                                datetime.today(),
                                (datetime.strptime(result.loc[i,"Date"], '%Y-%m-%d')),
                                #(datetime.date(result.loc[i,"Date"])),#, "%Y-%m-%d")),
                                str(result.loc[i,"Account Id"]),
                                str(result.loc[i,"Account Name"]),
                                str(result.loc[i,"Campaign Id"]),
                                str(result.loc[i,"Campaign Name"]),
                                int(result.loc[i,"Impressions"]),
                                int(result.loc[i,"Clicks"]),
                                int(result.loc[i,"Reach"]),
                                float(result.loc[i,"Spend"]),
                                float(result.loc[i,"Frequency"])
                                ])
                                #print("instered")
                                row_id=row_id+1
                                inserted_rows=inserted_rows+1
                            inserter.execute()
                            print("Instered Rows are " +str(inserted_rows))
                            LogFileWrite("Instered Rows are " +str(inserted_rows))
                        table_name=TableName('Extract','Campaign_data')
                        Delet_query=f"DELETE FROM {table_name} WHERE " +'"'+ 'Row_ID'+'"'+" NOT IN("
                        Delet_query+="SELECT MAX("+'"'+'Row_ID'+'"'+f") FROM {table_name} "
                        Delet_query+="GROUP BY " +'"'+'Date'+'",'+'"'+'Campaign Id'+'",'+'"'+'Campaign Name'+'",'
                        Delet_query+='"'+'Account Id'+'",'+'"'+'Impressions'+'",'
                        Delet_query+='"'+'Clicks'+'",'+'"'+'Account Name'+'",'+'"'+'Reach'+'",'+'"'+'Spend'+'",'
                        Delet_query+='"'+'Frequency'+'")'
                        #print(Delet_query)
                        
                        connection.execute_command(Delet_query)
                        print("Deleted Duplicate rows")
                        LogFileWrite("Successfully deleted Duplicate rows")
                    else:
                        print("unable to open connection to hyper file")
                        LogFileWrite("unable to open connection to hyper file")
                if connection.is_open==True:
                    connection.close()
                    print("Connection to Hyper file closed")
                    LogFileWrite("Connection to Hyper file closed")
                else:
                    print("Connection to Hyper file closed")
                    LogFileWrite("Connection to Hyper file closed")
                    print("Connection is open or closed" + str(connection.is_open))
            else:
                print("Unable to start the Hyper process ")
                LogFileWrite("Unable to start the Hyper process ")
        if hyperprocess.is_open==True:
            hyperprocess.close()
            print("Forcefully shutted down the Hyper Process")
            LogFileWrite("Forcefully shutted down the Hyper Process")
        else:
            print("Hyper process is shutted down")
            LogFileWrite("Hyper process is shutted down")
            print("Connection is open or closed" + str(connection.is_open))
            print("process is open or closed" + str(hyperprocess.is_open))
    except HyperException as ex:
        LogFileWrite("There is exception in starting Tableau Hyper Process. Exiting...")
        LogFileWrite(str(ex))
        connection.close()
        hyperprocess.close()
        SendEmailMessage()
        sys.exit()
Esempio n. 17
0
 def __init__(self):
     self.location = "./output/meli_challenge_result.hyper"
     self.test_location = "../output/meli_challenge_result.hyper"
     self.searchResult_table = TableDefinition('results', [
         TableDefinition.Column('id', SqlType.text(), Nullability.NOT_NULLABLE),
         TableDefinition.Column('site_id', SqlType.text(), Nullability.NOT_NULLABLE),
         TableDefinition.Column('title', SqlType.text(), Nullability.NOT_NULLABLE),
         TableDefinition.Column('seller', SqlType.text(), Nullability.NOT_NULLABLE),
         TableDefinition.Column('price', SqlType.text(), Nullability.NOT_NULLABLE),
         TableDefinition.Column('prices', SqlType.json(), Nullability.NOT_NULLABLE),
         TableDefinition.Column('sale_price', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('currency_id', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('available_quantity', SqlType.int(), Nullability.NULLABLE),
         TableDefinition.Column('sold_quantity', SqlType.int(), Nullability.NULLABLE),
         TableDefinition.Column('buying_mode', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('listing_type_id', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('stop_time', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('condition', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('permalink', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('thumbnail', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('accepts_mercadopago', SqlType.bool(), Nullability.NULLABLE),
         TableDefinition.Column('installments', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('address', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('shipping', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('seller_address', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('attributes', SqlType.text(), Nullability.NOT_NULLABLE),
         TableDefinition.Column('original_price', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('category_id', SqlType.text(), Nullability.NOT_NULLABLE),
         TableDefinition.Column('official_store_id', SqlType.int(), Nullability.NULLABLE),
         TableDefinition.Column('domain_id', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('catalog_product_id', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('tags', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('catalog_listing', SqlType.bool(), Nullability.NULLABLE),
         TableDefinition.Column('order_backend', SqlType.int(), Nullability.NULLABLE),
     ])
from tableauhyperapi import HyperProcess, Telemetry, \
    Connection, CreateMode, \
    NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \
    Inserter, \
    escape_name, escape_string_literal, \
    TableName, Name, \
    HyperException

# The table is called "Extract" and will be created in the "Extract" schema.
# This has historically been the default table name and schema for extracts created by Tableau
extract_table = TableDefinition(
    table_name=TableName("Extract", "Extract"),
    columns=[
        TableDefinition.Column(name='Name',
                               type=SqlType.text(),
                               nullability=NOT_NULLABLE),
        TableDefinition.Column(name='Location',
                               type=SqlType.geography(),
                               nullability=NOT_NULLABLE)
    ])


def run_insert_spatial_data_to_a_hyper_file():
    """
    An example of how to add spatial data to a Hyper file.
    """
    print("EXAMPLE - Add spatial data to a Hyper file ")
    path_to_database = Path("spatial_data.hyper")

    # Starts the Hyper Process with telemetry enabled to send data to Tableau.
Esempio n. 19
0
import csv
import tableauserverclient as TSC
import yaml
from pathlib import Path
from tableauhyperapi import HyperProcess, Telemetry, \
    Connection, CreateMode, \
    NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \
    Inserter, \
    escape_name, escape_string_literal, \
    HyperException

#Create table definition to be used in Hyper file
workbook_table = TableDefinition(
    table_name="Workbooks",
    columns=[
        TableDefinition.Column("Project Id", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("Content URL", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("Created At", SqlType.date(), NOT_NULLABLE),
        TableDefinition.Column("Id", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("Project Name", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("size", SqlType.big_int(), NOT_NULLABLE),
        TableDefinition.Column("Updated At", SqlType.date(), NOT_NULLABLE),
        TableDefinition.Column("Name", SqlType.text(), NOT_NULLABLE)
    ])
#Name of hyper file that will be generated locally and deployed to Tableau
path_to_database = Path("workbooks.hyper")
#Name of project where Hyper file will be deployed
#This project must exist on your server
PROJECT = 'HyperTest'

Esempio n. 20
0
    def run(self, args):
        """ Runs the command
        :param args: Arguments from argparse.Namespace
        """
        input_file = Path(args.input_file)
        output_file = Path(args.output_file)
        if args.mode == AdjustVertexOrderMode.INVERT:
            print("Reversing vertex order of polygons in spatial columns")
        else:
            print(
                "Adjusting vertex order of polygons assuming data source with "
                "flat - earth topology in spatial columns")

        # Starts the Hyper Process with telemetry enabled to send data to Tableau.
        # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
        with HyperProcess(
                telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:

            with Connection(endpoint=hyper.endpoint) as connection:
                catalog = connection.catalog

                # Create output database file, fails if the file already exists
                catalog.create_database(output_file)

                # Attach input and output database files
                catalog.attach_database(input_file, "input")
                catalog.attach_database(output_file, "output")

                # Iterate over all schemas in the input file
                for schema_name in catalog.get_schema_names("input"):

                    # Create the schema in the output file
                    catalog.create_schema_if_not_exists(
                        SchemaName("output", schema_name.name))

                    # Iterate over all tables in the input schema
                    for in_table in catalog.get_table_names(schema_name):
                        table_definition = catalog.get_table_definition(
                            name=in_table)
                        columns = table_definition.columns

                        out_table = TableName("output", schema_name.name,
                                              in_table.name)
                        out_table_definition = TableDefinition(
                            out_table, columns)

                        # Create the table in the output file with the same table definition.
                        # Note that any constraints on the table in the input file
                        # will not be present in the table in the output file
                        catalog.create_table(out_table_definition)

                        spatial_columns = [
                            c.name for c in columns
                            if c.type == SqlType.geography()
                        ]
                        in_column_modifiers = [
                            f"geo_{args.mode.value}_vertex_order({c.name})"
                            if c.type == SqlType.geography() else f"{c.name}"
                            for c in columns
                        ]

                        if len(spatial_columns) > 0:
                            print(
                                f"Copying table {in_table} with {len(spatial_columns)} "
                                f"spatial columns: {spatial_columns}...")
                        else:
                            print(
                                f"Copying table {in_table} with no spatial columns..."
                            )

                        row_count = connection.execute_command(
                            f"INSERT INTO {out_table} SELECT {','.join(in_column_modifiers)} FROM {in_table}"
                        )
                        print(f"   {row_count} rows copied")
Esempio n. 21
0
# -*- coding: utf-8 -*-
"""
Created on Thu Jul  2 21:35:47 2020

@author: Manish Chauhan || +91-9774083186
"""
from tableauhyperapi import HyperProcess, Inserter, Telemetry, Connection, CreateMode, NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, escape_name, escape_string_literal, HyperException, TableName

# Starting local hyper instance

extract_table = TableDefinition(
    table_name=TableName('extract', 'extract'),
    columns=[
        TableDefinition.Column("Sno", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("Name", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("Sale", SqlType.big_int()),
        TableDefinition.Column("Profit", SqlType.big_int()),
        #TableDefinition.Column("Date", SqlType.date())
    ])

data_to_insert = [["1", "Vishakha", 99999, 80]]


def run_create_hyper_file_from_csv():

    print("Inside Fucntion to pick data from CSV into table in new Hyper file")
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:

        # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists
        with Connection(
                endpoint=hyper.endpoint,
        x.append(workbook.name) 
        y.append(workbook.owner_id) 

    for users in y:
        z.append(server.users.get_by_id(users))

with open('workbook_info.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerows(zip(x, y, z))

# HYPER API TO CREATE HYPER FILE

Workbook_Info_table = TableDefinition(
    table_name="Workbook_Info",
    columns=[
        TableDefinition.Column("Workbook Name", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("Workbook LUID", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("Owner Info", SqlType.text(), NOT_NULLABLE)
    ]
)

def run_create_hyper_file_from_csv():

    path_to_database = Path("Workbook_Info.hyper")

    process_parameters = {
        "log_file_max_count": "2",
        "log_file_size_limit": "100M"
    }

    with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU, parameters=process_parameters) as hyper:
Esempio n. 23
0
"""

# Change these to match your use case.
hyper_file_path = "hyper_for_csv.hyper"
table_name = TableName("Extract", "Extract")
output_name = "output.csv"

path_to_database = Path(hyper_file_path)

# The table is called "Extract" and will be created in the "Extract" schema
# and contains four columns.
extract_table = TableDefinition(
    table_name=table_name,
    columns=[
        TableDefinition.Column(name='Customer ID',
                               type=SqlType.text(),
                               nullability=NULLABLE),
        TableDefinition.Column(name='Customer Name',
                               type=SqlType.text(),
                               nullability=NULLABLE),
        TableDefinition.Column(name='Loyalty Reward Points',
                               type=SqlType.big_int(),
                               nullability=NOT_NULLABLE),
        TableDefinition.Column(name='Segment',
                               type=SqlType.text(),
                               nullability=NULLABLE)
    ])


def insert_data():
    """
# Step 1: Start Hyper instance
with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
    #step 2: Start hyper process
    with Connection(endpoint=hyper.endpoint,
                    create_mode=CreateMode.CREATE_AND_REPLACE,
                    database=PATH_TO_HYPER) as connection:
        print(
            connection.execute_scalar_query(
                query="SELECT'securing connections with endpoint!'"))

        #create table def with Table name
        empdetails = TableDefinition(
            table_name=TableName('Extract', 'empdetails'),
            columns=[
                TableDefinition.Column('EMP_ID', SqlType.int()),
                TableDefinition.Column('EMP_NAME', SqlType.text()),
                TableDefinition.Column('MONTHY_SALARY', SqlType.int()),
                TableDefinition.Column('UNI_FLAG', SqlType.int())
            ])

        #Create the table in the connection catalog
        connection.catalog.create_schema('Extract')
        connection.catalog.create_table(empdetails)

        print('Validating the data...')
        cmd = f"COPY{empdetails.table_name} from {escape_string_literal(PATH_TO_CSV)}\
        with " "(format csv , NULL 'NULL', delimiter ',' , HEADER)"
        table_count = connection.execute_command(cmd)
        total_time = int((time.time() - start))
        total_time = "{:.2f}".format(total_time)
def run_insert_spatial_data_to_a_hyper_file():
    """
    An example of how to add spatial data to a Hyper file.
    """
    print("EXAMPLE - Add spatial data to a Hyper file ")
    path_to_database = Path("spatial_data.hyper")

    # Starts the Hyper Process with telemetry enabled to send data to Tableau.
    # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:

        # Creates new Hyper file "spatial_data.hyper".
        # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists.
        with Connection(
                endpoint=hyper.endpoint,
                database=path_to_database,
                create_mode=CreateMode.CREATE_AND_REPLACE) as connection:

            connection.catalog.create_schema(
                schema=extract_table.table_name.schema_name)
            connection.catalog.create_table(table_definition=extract_table)

            # Hyper API's Inserter allows users to transform data during insertion.
            # To make use of data transformation during insertion, the inserter requires the following inputs
            #   1. The connection to the Hyper instance containing the table.
            #   2. The table name or table defintion into which data is inserted.
            #   3. List of Inserter.ColumnMapping.
            #       This list informs the inserter how each column in the target table is tranformed.
            #       The list must contain all the columns into which data is inserted.
            #       "Inserter.ColumnMapping" maps a valid SQL expression (if any) to a column in the target table.
            #       For example Inserter.ColumnMapping('target_column_name', f'{escape_name("colA")}*{escape_name("colB")}')
            #       The column "target_column" contains the product of "colA" and "colB" after successful insertion.
            #       SQL expression string is optional in Inserter.ColumnMapping.
            #       For a column without any transformation only the column name is required.
            #       For example Inserter.ColumnMapping('no_data_transformation_column')
            #   4. The Column Definition of all input values provided to the Inserter

            # Inserter definition contains the column definition for the values that are inserted
            # The data input has two text values Name and Location_as_text
            inserter_definition = [
                TableDefinition.Column(name='Name',
                                       type=SqlType.text(),
                                       nullability=NOT_NULLABLE),
                TableDefinition.Column(name='Location_as_text',
                                       type=SqlType.text(),
                                       nullability=NOT_NULLABLE)
            ]

            # Column 'Name' is inserted into "Extract"."Extract" as-is.
            # Column 'Location' in "Extract"."Extract" of geography type is computed from Column 'Location_as_text' of text type
            # using the expression 'CAST("Location_as_text") AS GEOGRAPHY'.
            # Inserter.ColumnMapping is used for mapping the CAST expression to Column 'Location'.
            column_mappings = [
                'Name',
                Inserter.ColumnMapping(
                    'Location',
                    f'CAST({escape_name("Location_as_text")} AS GEOGRAPHY)')
            ]

            # Data to be inserted.
            data_to_insert = [['Seattle', "point(-122.338083 47.647528)"],
                              ['Munich', "point(11.584329 48.139257)"]]

            # Insert data into "Extract"."Extract" table with CAST expression.
            with Inserter(connection,
                          extract_table,
                          column_mappings,
                          inserter_definition=inserter_definition) as inserter:
                inserter.add_rows(rows=data_to_insert)
                inserter.execute()
            print("The data was added to the table.")

        print("The connection to the Hyper file has been closed.")
    print("The Hyper process has been shut down.")
Esempio n. 26
0
# -----------------------------------------------------------------------------
from pathlib import Path

from tableauhyperapi import HyperProcess, Telemetry, \
    Connection, CreateMode, \
    NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \
    Inserter, \
    escape_name, escape_string_literal, \
    HyperException


customer_table = TableDefinition(
    # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace.
    table_name="test02",
    columns=[
        TableDefinition.Column("ID", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("Mathematics", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("Science", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("English", SqlType.big_int(), NOT_NULLABLE),
        TableDefinition.Column("Japanese", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("History", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("Geography", SqlType.text(), NOT_NULLABLE),
    ]
)


def run_create_hyper_file_from_csv():
    """
    An example demonstrating loading data from a csv into a new Hyper file
    """
    print("EXAMPLE - Load data from CSV into table in new Hyper file")
Esempio n. 27
0
with HyperProcess(
        telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:

    # Creates a new Hyper file or
    # replaces the file if it already exists.
    with Connection(endpoint=hyper.endpoint,
                    database="drinks.hyper",
                    create_mode=CreateMode.CREATE_AND_REPLACE) as connection:

        # default schema name is "public"
        # define table
        drinks_table = TableDefinition(
            table_name="drinks",
            columns=[
                TableDefinition.Column("country", SqlType.text(),
                                       NOT_NULLABLE),
                TableDefinition.Column("beer_servings", SqlType.big_int(),
                                       NOT_NULLABLE),
                TableDefinition.Column("spirit_servings", SqlType.big_int(),
                                       NOT_NULLABLE),
                TableDefinition.Column("wine_servings", SqlType.big_int(),
                                       NOT_NULLABLE),
                TableDefinition.Column("total_litres_of_pure_alcohol",
                                       SqlType.double(), NOT_NULLABLE),
                TableDefinition.Column("continent", SqlType.text(),
                                       NOT_NULLABLE)
            ])

        # create tables
        connection.catalog.create_table(drinks_table)
Esempio n. 28
0
#
# -----------------------------------------------------------------------------
from pathlib import Path

from tableauhyperapi import HyperProcess, Telemetry, \
    Connection, CreateMode, \
    NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \
    Inserter, \
    escape_name, escape_string_literal, \
    HyperException

customer_table = TableDefinition(
    # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace.
    table_name="Customer",
    columns=[
        TableDefinition.Column("Customer ID", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("Customer Name", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("Loyalty Reward Points", SqlType.big_int(),
                               NOT_NULLABLE),
        TableDefinition.Column("Segment", SqlType.text(), NOT_NULLABLE)
    ])


def run_create_hyper_file_from_csv():
    """
    An example demonstrating loading data from a csv into a new Hyper file
    """
    print("EXAMPLE - Load data from CSV into table in new Hyper file")

    path_to_database = Path("customer.hyper")
Esempio n. 29
0
    def _hyper_sql_type(self, source_column):
        """
        Finds the correct Hyper column type for source_column

        source_column (obj): Source column (Instance of google.cloud.bigquery.schema.SchemaField)

        Returns a tableauhyperapi.SqlType Object
        """

        source_column_type = source_column.field_type
        return_sql_type = {
            "BOOL": SqlType.bool(),
            "BYTES": SqlType.bytes(),
            "DATE": SqlType.date(),
            "DATETIME": SqlType.timestamp(),
            "INT64": SqlType.big_int(),
            "INTEGER": SqlType.int(),
            "NUMERIC": SqlType.numeric(18, 9),
            "FLOAT64": SqlType.double(),
            "STRING": SqlType.text(),
            "TIME": SqlType.time(),
            "TIMESTAMP": SqlType.timestamp_tz(),
        }.get(source_column_type)

        if return_sql_type is None:
            error_message = "No Hyper SqlType defined for BigQuery source type: {}".format(
                source_column_type
            )
            logger.error(error_message)
            raise LookupError(error_message)

        logger.debug(
            "Translated source column type {} to Hyper SqlType {}".format(
                source_column_type, return_sql_type
            )
        )
        return return_sql_type
Esempio n. 30
0
import sys

from pathlib import Path

from tableauhyperapi import HyperProcess, Telemetry, \
    Connection, CreateMode, \
    NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \
    Inserter, \
    escape_name, escape_string_literal, \
    HyperException

product_table = TableDefinition(
    # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace.
    table_name="Products",
    columns=[
        TableDefinition.Column("category", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("title", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("price", SqlType.double(), NOT_NULLABLE),
        TableDefinition.Column("available_quantity", SqlType.big_int(),
                               NOT_NULLABLE),
        TableDefinition.Column("sold_quantity", SqlType.big_int(),
                               NOT_NULLABLE),
        TableDefinition.Column("permalink", SqlType.text(), NOT_NULLABLE)
    ])


def call_mlapi_to_dict(SearchCategory):

    print("Start call_mlapi_to_dict.")

    try: