Esempio n. 1
0
def createTabTable(tableName, columnHeading, dataTypes):
    extract_table = TableDefinition(table_name=TableName("Extract", tableName))

    for head in columnHeading:
        extract_table.add_column(head, eval(mapper[dataTypes.pop(0)]))

    return extract_table
Esempio n. 2
0
def to_hyper(df,
             hyper_file_name,
             custom_schema="Extract",
             custom_table_name="Extract"):
    """
    Write a Tableau Hyper file from a Pandas DataFrame.

    Currently can only write single table extracts, which is Tableau's
    default way of creating an extract.

    Args:
        df: Specify which DataFrame you want to output
        hyper_file_name: Specify the file name such as "Example.hyper"
        custom_schema: If you need to change the schema name. Defaults to "Extract"
        custom_table_name: If you need to change the schema name. Defaults to "Extract"

    Returns:
        Tableau Hyper file
    """

    # Starts the Hyper Process
    with HyperProcess(
            telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU,
            parameters={"log_config": ""},
    ) as hyper:

        # Creates a .hyper file to put the data into
        with Connection(hyper.endpoint, hyper_file_name,
                        CreateMode.CREATE_AND_REPLACE) as connection:

            connection.catalog.create_schema(custom_schema)

            # create a .hyper compatible column definition
            # from pd DataFrame column names and dtypes
            # using 3 list comprehensions to loop through
            # all the columns in the DataFrame

            column_names = [column for column in df.columns]

            column_dtype = [dtype for dtype in df.dtypes]

            hyper_table = TableDefinition(
                TableName(custom_schema, custom_table_name),
                [
                    TableDefinition.Column(
                        column_names[column], dtype_mapper[str(
                            column_dtype[column])])
                    for column in range(len(column_names))
                ],
            )
            connection.catalog.create_table(hyper_table)

            # Repace NaN with None, otherwise it will not be Null in Tableau
            df.replace({np.nan: None}, inplace=True)

            # Insert the data values into the hyper file
            data_to_insert = df.to_numpy()
            with Inserter(connection, hyper_table) as inserter:
                inserter.add_rows(tqdm((row for row in data_to_insert)))
                inserter.execute()
Esempio n. 3
0
 def test_insert_data_into_hyper_file(self):
     data = [(1001, "Jane", "Doe"), (1002, "John", "Doe"),
             (2201, "Elonzo", "Smith")]
     name = "output"
     table_def = TableDefinition(
         table_name=TableName("Extract", "Extract"),
         columns=[
             TableDefinition.Column(name=Name("id"),
                                    type=SqlType.big_int(),
                                    nullability=NULLABLE),
             TableDefinition.Column(name=Name("first_name"),
                                    type=SqlType.text(),
                                    nullability=NULLABLE),
             TableDefinition.Column(name=Name("last_name"),
                                    type=SqlType.text(),
                                    nullability=NULLABLE)
         ])
     path = insert_data_into_hyper_file(data, name, table_def)
     print(f'Database Path : {path}')
     tables = TestUtils.get_tables("Extract",
                                   "/tmp/hyperleaup/output/output.hyper")
     assert (len(tables) == 1)
     num_rows = TestUtils.get_row_count(
         "Extract", "Extract", "/tmp/hyperleaup/output/output.hyper")
     assert (num_rows == 3)
def importTextSchema(
    schemaJson
):
    try:
        if 'name' not in schemaJson:
            print('[ERROR] No table name defined in the schema json. Key \'name\' required of type STRING:\nExiting now\n.')
            exit( -1 )

        if 'columns' not in schemaJson:
            print('[ERROR] No columns defined in the schema json. Key \'columns\' required of type [OBJECT]:\nExiting now\n.')
            exit( -1 )

        print('[INFO] Creating temp landing table.')
        table_def = TableDefinition('temptable');

        for c in schemaJson['columns']:
            colFunc = typeObj['TEXT']
            colType = colFunc()
            table_def.add_column(c['name'], colType)

        if ( table_def == None ):
            print('[ERROR] A fatal error occurred while creating the temp table:\nExiting now\n.')
            exit( -1 )

    except HyperException as e:
        print('[ERROR] A fatal error occurred while reading the schema definition:\n', e, '\nExiting now.')
        exit( -1 )

    return table_def
Esempio n. 5
0
    def create_schema(self, schema_dss, destination_file_path):
        """
        Read the Tableau Hyper file an.

        :param schema_dss: DSS schema from the DSS dataset to export
            example: [{"columns": [{"name": "customer_id", "type": "bigint"}, ...]}, ...]

        :param destination_file_path:
        :return:
        """
        # Read the destination file of the dss
        self.output_file = destination_file_path
        logger.info(
            "Writing the Tableau Hyper file to the following location: {}".
            format(destination_file_path))
        logger.info(
            "The dataset to export has the following schema: {}".format(
                schema_dss))

        dss_columns = schema_dss['columns']
        dss_storage_types = [
            column_descriptor['type'] for column_descriptor in dss_columns
        ]
        self.schema_converter.set_dss_storage_types(dss_storage_types)

        self.is_geo_table = dss_is_geo(schema_dss)
        logger.info("The input dataset contains a geo column: {}".format(
            self.is_geo_table))

        if not self.schema_name or not self.table_name:
            logger.warning("Did not received the table or schema name.")
            raise ValueError("No valid schema or table name received.")

        logger.info("Received target schema {} and table {}".format(
            self.schema_name, self.table_name))

        # Create the Tableau Hyper schema from the DSS schema
        self.output_table_definition = TableDefinition(
            TableName(self.schema_name, self.table_name),
            self.schema_converter.dss_columns_to_hyper_columns(dss_columns))

        # Open connection to file
        self.hyper = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU)
        self.connection = Connection(self.hyper.endpoint, self.output_file,
                                     CreateMode.CREATE_AND_REPLACE)
        assert self.connection is not None
        self.connection.catalog.create_schema(self.schema_name)
        self.connection.catalog.create_table(self.output_table_definition)

        # Handle the geo case
        if self.is_geo_table:
            logger.info("Detected geo column. Creating a temporary table...")
            dss_tmp_schema = geo_to_text(schema_dss)
            dss_tmp_columns = dss_tmp_schema['columns']
            self.tmp_table_definition = TableDefinition(
                TableName(self.schema_name, "tmp_" + self.table_name),
                self.schema_converter.dss_columns_to_hyper_columns(
                    dss_tmp_columns))
            self.connection.catalog.create_table(self.tmp_table_definition)
            logger.info("Created temporary table")
Esempio n. 6
0
def createHyperTable():
    columns = getConfig()
    page_table = TableDefinition(table_name="Page")
    for i in range(0, len(columns)):
        if columns[i]['DataType'] == "String":
            dt = SqlType.text()
        elif columns[i]['DataType'] == "Date":
            dt = SqlType.date()
        else:
            dt = SqlType.big_int()
        page_table.add_column(columns[i]['ColumnName'], dt)
    return page_table
def df_to_extract(df, output_path):
    '''
    Converts a Pandas dataframe to a Tableau Extract.

    Parameters
    ----------
    df (pandas dataframe): Dataframe to turn into a Tableau extract
    output_path (str): Where to create the Tableau extract
    ''' 

    # Replace nan's with 0
    df = df.replace(np.nan, 0.0, regex=True)

    print('Creating Tableau data extract...')
    with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with Connection(hyper.endpoint, output_path, CreateMode.CREATE_AND_REPLACE) as connection:
            
            # Create schema
            connection.catalog.create_schema('Extract')

            # Create list of column definitions, based on the datatypes in pandas dataframe
            dtype_map = {
                'int32': SqlType.int(),
                'int64': SqlType.big_int(),
                'float32': SqlType.double(),
                'float64': SqlType.double(),
                'datetime64[ns]': SqlType.date(),
                'object': SqlType.text() 
            }
            table_def = []

            # Get column headers to loop through them
            df_columns = list(df)

            for col_header in df_columns:
                dtype_str = str(df[col_header].dtype)

                # Use dtype_str to lookup appropiate SqlType from dtype_map and append new column to table definition
                table_def.append(TableDefinition.Column(col_header, dtype_map[dtype_str]))
                
            # Define table
            extract_table = TableDefinition(TableName('Extract', 'Extract'), table_def)

            # Create table
            connection.catalog.create_table(extract_table)

            # Insert data
            with Inserter(connection, extract_table) as inserter:
                for idx, row in df.iterrows():
                    inserter.add_row(row)
                
                inserter.execute() 
Esempio n. 8
0
def create():
    with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        request_data = request.get_json()
        print(request_data)
        print("The HyperProcess has started.")
        object_name = "mealprep.hyper"
        file_name = os.environ.get('bucket_name')

        with Connection(
                endpoint=hyper.endpoint,
                database=path_to_database,
                create_mode=CreateMode.CREATE_AND_REPLACE) as connection:
            print("The connection to the Hyper file is open.")
            connection.catalog.create_schema('Extract')
            example_table = TableDefinition(TableName('Extract', 'Extract'), [
                TableDefinition.Column('Breakfast', SqlType.text()),
                TableDefinition.Column('Lunch', SqlType.text()),
                TableDefinition.Column('Dinner', SqlType.text()),
            ])
            print("The table is defined.")
            connection.catalog.create_table(example_table)
            print(example_table)
            print(type(example_table))
            with Inserter(connection, example_table) as inserter:
                for i in request_data['data']:
                    inserter.add_row([i['breakfast'], i['lunch'], i['dinner']])

                inserter.execute()
                print("The data was added to the table.")

            print("The connection to the Hyper extract file is closed.")
        print("The HyperProcess has shut down.")

        with open('mealprep.hyper', 'rb') as reader:
            if object_name is None:
                object_name = file_name
            s3_client = boto3.client(
                's3',
                aws_access_key_id=os.environ.get('aws_access_key_id'),
                aws_secret_access_key=os.environ.get('aws_secret_access_key'))
            try:
                response = s3_client.upload_fileobj(reader, file_name,
                                                    object_name)
            except ClientError as e:
                logging.error(e)
                return False

    return redirect(url_for('index'))
 def fn_build_hyper_columns(self, logger, timer, in_data_frame_structure):
     timer.start()
     list_to_return = []
     for current_field_structure in in_data_frame_structure:
         list_to_return.append(current_field_structure['order'])
         current_column_type = self.fn_convert_to_hyper_types(
             current_field_structure['type'])
         logger.debug(
             self.locale.gettext(
                 'Column {column_order} having name [{column_name}] and type "{column_type}" '
                 + 'will become "{column_type_new}"').replace(
                     '{column_order}',
                     str(current_field_structure['order'])).replace(
                         '{column_name}',
                         current_field_structure['name']).replace(
                             '{column_type}',
                             str(current_field_structure['type'])).replace(
                                 '{column_type_new}',
                                 str(current_column_type)))
         nullability_value = NULLABLE
         if current_field_structure['nulls'] == 0:
             nullability_value = NOT_NULLABLE
         list_to_return[
             current_field_structure['order']] = TableDefinition.Column(
                 name=current_field_structure['name'],
                 type=current_column_type,
                 nullability=nullability_value)
     logger.info(self.locale.gettext('Building Hyper columns completed'))
     timer.stop()
     return list_to_return
Esempio n. 10
0
def convert_struct_field(column: StructField) -> TableDefinition.Column:
    """Converts a Spark StructField to a Tableau Hyper SqlType"""
    if column.dataType == IntegerType():
        sql_type = SqlType.int()
    elif column.dataType == LongType():
        sql_type = SqlType.big_int()
    elif column.dataType == ShortType():
        sql_type = SqlType.small_int()
    elif column.dataType == DoubleType():
        sql_type = SqlType.double()
    elif column.dataType == FloatType():
        sql_type = SqlType.double()
    elif column.dataType == BooleanType():
        sql_type = SqlType.bool()
    elif column.dataType == DateType():
        sql_type = SqlType.date()
    elif column.dataType == TimestampType():
        sql_type = SqlType.timestamp()
    elif column.dataType == StringType():
        sql_type = SqlType.text()
    else:
        # Trap the DecimalType case
        if str(column.dataType).startswith("DecimalType"):
            # Max precision is only up to 18 decimal places in Tableau Hyper API
            precision = column.dataType.precision if column.dataType.precision <= 18 else 18
            scale = column.dataType.scale
            sql_type = SqlType.numeric(precision, scale)
        else:
            raise ValueError(f'Invalid StructField datatype for column `{column.name}` : {column.dataType}')
    nullable = NULLABLE if column.nullable else NOT_NULLABLE
    return TableDefinition.Column(name=column.name, type=sql_type, nullability=nullable)
Esempio n. 11
0
def get_table_def(df: DataFrame, schema_name: str,
                  table_name: str) -> TableDefinition:
    """Returns a Tableau TableDefintion given a Spark DataFrame"""
    schema = df.schema
    cols = list(map(convert_struct_field, schema))
    return TableDefinition(table_name=TableName("Extract", "Extract"),
                           columns=cols)
Esempio n. 12
0
 def get_table_def(df: DataFrame,
                   schema_name: str = 'Extract',
                   table_name: str = 'Extract') -> TableDefinition:
     """Returns a Tableau TableDefintion given a Spark DataFrame"""
     schema = df.schema
     cols = list(map(HyperUtils.convert_struct_field, schema))
     return TableDefinition(table_name=TableName(schema_name, table_name),
                            columns=cols)
Esempio n. 13
0
def create_column_def(table_def_dict: dict):

    column_def = []
    for key in table_def_dict.keys():
        column_def.append(
            TableDefinition.Column(
                key, sql_type_dict[table_def_dict[key]['type']],
                nullable_dict[table_def_dict[key]['nullable']]))

    return column_def
 def fn_create_hyper_table(self, local_logger, timer, in_dict):
     timer.start()
     out_hyper_table = TableDefinition(
         TableName(in_dict['schema name'], in_dict['table name']),
         columns=in_dict['columns'],
     )
     in_dict['connection'].catalog.create_table(table_definition=out_hyper_table)
     local_logger.info(self.locale.gettext(
         'Hyper table "{hyper_table_name}" has been created')
                       .replace('{hyper_table_name}', in_dict['table name']))
     timer.stop()
     return out_hyper_table
def createHyperFile():
    dict = parseData()
    file = "/Users/jharris/Desktop/workbookUsers.hyper"
    cols = dict['cols']
    data = dict['data']

    with HyperProcess(
            telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with Connection(hyper.endpoint, file,
                        CreateMode.CREATE_AND_REPLACE) as connection:
            connection.catalog.create_schema('Extract')

            table = TableDefinition(
                TableName('Extract', 'Extract'),
                [TableDefinition.Column(col, SqlType.text()) for col in cols])

            connection.catalog.create_table(table)

            with Inserter(connection, table) as inserter:
                inserter.add_rows(rows=data)
                inserter.execute()
Esempio n. 16
0
    def _hyper_table_definition(self, source_table, hyper_table_name="Extract"):
        """
        Build a hyper table definition from source_schema

        source_table (obj): Source table (Instance of google.cloud.bigquery.table.Table)
        hyper_table_name (string): Name of the target Hyper table, default="Extract"

        Returns a tableauhyperapi.TableDefinition Object
        """

        logger.debug(
            "Building Hyper TableDefinition for table {}".format(source_table.reference)
        )
        target_cols = []
        for source_field in source_table.schema:
            this_name = source_field.name
            this_type = self._hyper_sql_type(source_field)
            this_col = TableDefinition.Column(name=this_name, type=this_type)

            # Check for Nullability
            this_mode = source_field.mode
            if this_mode == "REPEATED":
                raise (
                    HyperSQLTypeMappingError(
                        "Field mode REPEATED is not implemented in Hyper"
                    )
                )
            if this_mode == "REQUIRED":
                this_col = TableDefinition.Column(
                    this_name, this_type, Nullability.NOT_NULLABLE
                )

            target_cols.append(this_col)
            logger.debug("..Column {} - Type {}".format(this_name, this_type))

        target_schema = TableDefinition(
            table_name=TableName("Extract", hyper_table_name), columns=target_cols
        )

        return target_schema
def importSchema(
    schemaJson
):
    try:
        if 'name' not in schemaJson:
            print('[ERROR] No table name defined in the schema json. Key \'name\' required of type STRING:\nExiting now\n.')
            exit( -1 )

        if 'columns' not in schemaJson:
            print('[ERROR] No columns defined in the schema json. Key \'columns\' required of type [OBJECT]:\nExiting now\n.')
            exit( -1 )

        table_def = TableDefinition(schemaJson['name']);

        for c in schemaJson['columns']:
            if ( 'name' not in c ):
                print('No column name defined in the schema json. Key \'name\' required of type STRING:\nExiting now\n.')
                exit( -1 )
            colFunc = typeObj['TEXT']
            if( 'type' in c and c['type'] in typeObj):
                colFunc = typeObj[c['type']]
                if (c['type'] == 'CHAR'):
                    if ( 'length' not in c ):
                        print('No length defined for CHAR column', c['name'],'\nKey \'length\' required of type INTEGER:\nExiting now\n.')
                        exit( -1 )
                    colType = colFunc(c['length'])
                elif (c['type'] == 'VARCHAR'):
                    if ( 'length' not in c ):
                        print('No length defined for VARCHAR column', c['name'],'\nKey \'length\' required of type INTEGER:\nExiting now\n.')
                        exit( -1 )
                    colType = colFunc(c['length'])
                elif (c['type'] == 'NUMERIC'):
                    if ( 'precision' not in c or 'scale' not in c ):
                        print('No precision and/or scale defined for NUMERIC column', c['name'],'\nKeys \'precision\' and \'scale\' required of type INTEGER:\nExiting now\n.')
                        exit( -1 )
                    colType = colFunc(c['precision'], c['scale'])
                else:
                    colType = colFunc()
            else:
                colType = colFunc()
            if( 'collation' in c and c['collation'] in collationObj):
                colCollation = collationObj[c['collation']]
                table_def.add_column(c['name'], colType, colCollation)
            else:
                table_def.add_column(c['name'], colType)

        if ( table_def == None ):
            print('[ERROR] A fatal error occurred while creating the table:\nExiting now\n.')
            exit( -1 )

    except HyperException as e:
        print('[ERROR] A fatal error occurred while reading the schema definition:\n', e, '\nExiting now.')
        exit( -1 )

    return table_def
Esempio n. 18
0
    def dss_columns_to_hyper_columns(self, dss_columns):
        """
        Convert the columns of the DSS dataset to Tableau Hyper columns

        :param dss_columns: columns of the DSS dataset
        :example: [{"name": "customer_id", "type": "bigint"}, ...]
        >>> dss_columns = [{"name": "customer_id", "type": "bigint"}]
        :return: hyper_columns: tableau hyper columns object
        """
        hyper_columns = []
        for dss_column in dss_columns:
            dss_column_name, dss_column_type = dss_column['name'], dss_column[
                'type']
            hyper_type = self.type_converter.dss_type_to_hyper(dss_column_type)
            hyper_columns.append(
                TableDefinition.Column(dss_column_name, hyper_type))
        return hyper_columns
Esempio n. 19
0
def create_hyper(dist_path: str, table_def_dict: dict, file_name: str,
                 src_path: str):
    # Hyperファイルを扱うためのセッションを作成
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with Connection(
                endpoint=hyper.endpoint,
                database=dist_path,
                create_mode=CreateMode.CREATE_AND_REPLACE) as connection:

            column_def = create_column_def(table_def_dict)
            # テーブルを定義
            table_def = TableDefinition(table_name=file_name,
                                        columns=column_def)
            # テーブル定義をもとに仮想的にテーブル作成(default schemaはpublic)
            connection.catalog.create_table(table_def)
            # PostgreSQLライクにCOPYコマンドを実行
            record_count = connection.execute_command(command=f'''
                COPY {table_def.table_name} from {escape_string_literal(src_path)} with (format csv, delimiter ',', header)
                ''')
            print(
                f"The number of rows in table {table_def.table_name} is {record_count}."
            )
def create_extract_schema(raw_data=None, raw_data_path=None):
    """
    [summary]
    This function generates the tableau hyper file columns to be used for generation of the hyper extract file

    Args:
        raw_data ([dataframe], optional): Pass the raw pandas dataframe here. If this is not found, the function will check for raw_file_path. Defaults to None.
        
        raw_data_path ([string], optional): Pass path of the csv file for the dataframe if raw dataframe is not available. Defaults to None.

    Returns:
        [object list]: Returns a list of objects representing tableau hyper extract columns
    """
    if raw_data is None:
        try:
            raw_data = pd.read_csv(raw_data_path)
        except:
            return consolelog('ERROR: No filepath found')
    elif raw_data is not None:
        pass
    else:
        return consolelog('ERROR: No data found')

    columns = raw_data.columns.tolist()
    tableau_extract_columns = []

    for col in columns:
        conversion_values = convert_datatype(str(raw_data[[col]].dtypes[0]))
        converted_type = conversion_values[0]
        def_value = conversion_values[1]
        consolelog(
            f"Column: [{col}] with datatype ~{str(raw_data[[col]].dtypes[0])}~ converted to {converted_type}"
        )
        tableau_extract_columns.append(
            TableDefinition.Column(col, converted_type))
        raw_data[col] = raw_data[col].fillna(def_value)

    return (tableau_extract_columns, raw_data)
Esempio n. 21
0
from pathlib import Path
from tableauhyperapi import HyperProcess, Telemetry, \
    Connection, CreateMode, NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \
    Inserter, escape_name, escape_string_literal, HyperException
import config as cf
from spark_connect import sparkConnect

# test = sparkConnect()
# schema = test[1]
# print(schema)

customer_table = TableDefinition(
    # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace.
    table_name=cf.table_name,
    columns=sparkConnect())

print('\n', cf.input_file_path, '\n')


def run_create_hyper_file_from_csv():
    """
    Loading data from a csv into a new Hyper file
    """
    print("Load data from CSV into table in new Hyper file")
    # Starts the Hyper Process with telemetry enabled to send data to Tableau.
    # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
    with HyperProcess(
            telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:

        with Connection(
                endpoint=hyper.endpoint,
def Full_refresh(result):
    LogFileWrite("Running Full refresh")
    try:
        with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyperprocess:
            print("The HyperProcess has started.")
            LogFileWrite("The HyperProcess has started.")
            print(hyperprocess.is_open)
            if hyperprocess.is_open==True:
                with Connection(hyperprocess.endpoint, 'Facebook_campaigns.hyper', CreateMode.CREATE_AND_REPLACE,) as connection: 
                    if connection.is_open==True:
                        print("The connection to the Hyper file is open.")
                        LogFileWrite("The connection to the Hyper file is open.")
                        connection.catalog.create_schema('Extract')
                        DataTable = TableDefinition(TableName('Extract','Campaign_data'),[
                        ############Below Columns are extracted from Report data API
                        TableDefinition.Column('Row_ID', SqlType.big_int()),
                        TableDefinition.Column('Inserted Date', SqlType.date()),
                        TableDefinition.Column('Date', SqlType.date()),
                        TableDefinition.Column('Account Id', SqlType.varchar(50)),
                        TableDefinition.Column('Account Name', SqlType.text()),
                        TableDefinition.Column('Campaign Id', SqlType.varchar(50)),
                        TableDefinition.Column('Campaign Name', SqlType.text()),
                        TableDefinition.Column('Impressions', SqlType.big_int()),
                        TableDefinition.Column('Clicks', SqlType.big_int()),
                        TableDefinition.Column('Reach', SqlType.big_int()),
                        TableDefinition.Column('Spend', SqlType.double()),
                        TableDefinition.Column('Frequency', SqlType.double()),
                        ])
                        print("The table is defined.")
                        LogFileWrite("Successfully Facebook Campaign Table is defined")
                        connection.catalog.create_table(DataTable)
                       # print(Campaign_df["Id"].dtype)
                        #print(range(len(Campaign_df["Id"])))
                        
                        with Inserter(connection, TableName('Extract','Campaign_data')) as inserter:
                            inserted_rows=1
                            row_id=1
                            for i in range(0,len(result["Campaign Id"])):
                                #print(str(result.loc[i,"CampaignId"]))
                                #print(result.loc[i,"Date"])
                                inserter.add_row([
                                int(row_id),
                                datetime.today(),
                                (datetime.strptime(result.loc[i,"Date"], '%Y-%m-%d')),
                                #(datetime.date(result.loc[i,"Date"])),#, "%Y-%m-%d")),
                                str(result.loc[i,"Account Id"]),
                                str(result.loc[i,"Account Name"]),
                                str(result.loc[i,"Campaign Id"]),
                                str(result.loc[i,"Campaign Name"]),
                                int(result.loc[i,"Impressions"]),
                                int(result.loc[i,"Clicks"]),
                                int(result.loc[i,"Reach"]),
                                float(result.loc[i,"Spend"]),
                                float(result.loc[i,"Frequency"])
                                ])
                                #print("instered")
                                row_id=row_id+1
                                inserted_rows=inserted_rows+1
                            inserter.execute()
                            print("Instered Rows are " +str(inserted_rows))
                            LogFileWrite("Instered Rows are " +str(inserted_rows))
                        table_name=TableName('Extract','Campaign_data')
                        Delet_query=f"DELETE FROM {table_name} WHERE " +'"'+ 'Row_ID'+'"'+" NOT IN("
                        Delet_query+="SELECT MAX("+'"'+'Row_ID'+'"'+f") FROM {table_name} "
                        Delet_query+="GROUP BY " +'"'+'Date'+'",'+'"'+'Campaign Id'+'",'+'"'+'Campaign Name'+'",'
                        Delet_query+='"'+'Account Id'+'",'+'"'+'Impressions'+'",'
                        Delet_query+='"'+'Clicks'+'",'+'"'+'Account Name'+'",'+'"'+'Reach'+'",'+'"'+'Spend'+'",'
                        Delet_query+='"'+'Frequency'+'")'
                        #print(Delet_query)
                        
                        connection.execute_command(Delet_query)
                        print("Deleted Duplicate rows")
                        LogFileWrite("Successfully deleted Duplicate rows")
                    else:
                        print("unable to open connection to hyper file")
                        LogFileWrite("unable to open connection to hyper file")
                if connection.is_open==True:
                    connection.close()
                    print("Connection to Hyper file closed")
                    LogFileWrite("Connection to Hyper file closed")
                else:
                    print("Connection to Hyper file closed")
                    LogFileWrite("Connection to Hyper file closed")
                    print("Connection is open or closed" + str(connection.is_open))
            else:
                print("Unable to start the Hyper process ")
                LogFileWrite("Unable to start the Hyper process ")
        if hyperprocess.is_open==True:
            hyperprocess.close()
            print("Forcefully shutted down the Hyper Process")
            LogFileWrite("Forcefully shutted down the Hyper Process")
        else:
            print("Hyper process is shutted down")
            LogFileWrite("Hyper process is shutted down")
            print("Connection is open or closed" + str(connection.is_open))
            print("process is open or closed" + str(hyperprocess.is_open))
    except HyperException as ex:
        LogFileWrite("There is exception in starting Tableau Hyper Process. Exiting...")
        LogFileWrite(str(ex))
        connection.close()
        hyperprocess.close()
        SendEmailMessage()
        sys.exit()
Esempio n. 23
0
    # Creates a new Hyper file or
    # replaces the file if it already exists.
    with Connection(endpoint=hyper.endpoint,
                    database="drinks.hyper",
                    create_mode=CreateMode.CREATE_AND_REPLACE) as connection:

        # default schema name is "public"
        # define table
        drinks_table = TableDefinition(
            table_name="drinks",
            columns=[
                TableDefinition.Column("country", SqlType.text(),
                                       NOT_NULLABLE),
                TableDefinition.Column("beer_servings", SqlType.big_int(),
                                       NOT_NULLABLE),
                TableDefinition.Column("spirit_servings", SqlType.big_int(),
                                       NOT_NULLABLE),
                TableDefinition.Column("wine_servings", SqlType.big_int(),
                                       NOT_NULLABLE),
                TableDefinition.Column("total_litres_of_pure_alcohol",
                                       SqlType.double(), NOT_NULLABLE),
                TableDefinition.Column("continent", SqlType.text(),
                                       NOT_NULLABLE)
            ])

        # create tables
        connection.catalog.create_table(drinks_table)

        path_to_csv = "drinks.csv"
        print(drinks_table.table_name)

        count_in_drinks_table = connection.execute_command(
Esempio n. 24
0
 def __init__(self):
     self.location = "./output/meli_challenge_result.hyper"
     self.test_location = "../output/meli_challenge_result.hyper"
     self.searchResult_table = TableDefinition('results', [
         TableDefinition.Column('id', SqlType.text(), Nullability.NOT_NULLABLE),
         TableDefinition.Column('site_id', SqlType.text(), Nullability.NOT_NULLABLE),
         TableDefinition.Column('title', SqlType.text(), Nullability.NOT_NULLABLE),
         TableDefinition.Column('seller', SqlType.text(), Nullability.NOT_NULLABLE),
         TableDefinition.Column('price', SqlType.text(), Nullability.NOT_NULLABLE),
         TableDefinition.Column('prices', SqlType.json(), Nullability.NOT_NULLABLE),
         TableDefinition.Column('sale_price', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('currency_id', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('available_quantity', SqlType.int(), Nullability.NULLABLE),
         TableDefinition.Column('sold_quantity', SqlType.int(), Nullability.NULLABLE),
         TableDefinition.Column('buying_mode', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('listing_type_id', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('stop_time', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('condition', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('permalink', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('thumbnail', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('accepts_mercadopago', SqlType.bool(), Nullability.NULLABLE),
         TableDefinition.Column('installments', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('address', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('shipping', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('seller_address', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('attributes', SqlType.text(), Nullability.NOT_NULLABLE),
         TableDefinition.Column('original_price', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('category_id', SqlType.text(), Nullability.NOT_NULLABLE),
         TableDefinition.Column('official_store_id', SqlType.int(), Nullability.NULLABLE),
         TableDefinition.Column('domain_id', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('catalog_product_id', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('tags', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('catalog_listing', SqlType.bool(), Nullability.NULLABLE),
         TableDefinition.Column('order_backend', SqlType.int(), Nullability.NULLABLE),
     ])
def run_insert_spatial_data_to_a_hyper_file():
    """
    An example of how to add spatial data to a Hyper file.
    """
    print("EXAMPLE - Add spatial data to a Hyper file ")
    path_to_database = Path("spatial_data.hyper")

    # Starts the Hyper Process with telemetry enabled to send data to Tableau.
    # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:

        # Creates new Hyper file "spatial_data.hyper".
        # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists.
        with Connection(
                endpoint=hyper.endpoint,
                database=path_to_database,
                create_mode=CreateMode.CREATE_AND_REPLACE) as connection:

            connection.catalog.create_schema(
                schema=extract_table.table_name.schema_name)
            connection.catalog.create_table(table_definition=extract_table)

            # Hyper API's Inserter allows users to transform data during insertion.
            # To make use of data transformation during insertion, the inserter requires the following inputs
            #   1. The connection to the Hyper instance containing the table.
            #   2. The table name or table defintion into which data is inserted.
            #   3. List of Inserter.ColumnMapping.
            #       This list informs the inserter how each column in the target table is tranformed.
            #       The list must contain all the columns into which data is inserted.
            #       "Inserter.ColumnMapping" maps a valid SQL expression (if any) to a column in the target table.
            #       For example Inserter.ColumnMapping('target_column_name', f'{escape_name("colA")}*{escape_name("colB")}')
            #       The column "target_column" contains the product of "colA" and "colB" after successful insertion.
            #       SQL expression string is optional in Inserter.ColumnMapping.
            #       For a column without any transformation only the column name is required.
            #       For example Inserter.ColumnMapping('no_data_transformation_column')
            #   4. The Column Definition of all input values provided to the Inserter

            # Inserter definition contains the column definition for the values that are inserted
            # The data input has two text values Name and Location_as_text
            inserter_definition = [
                TableDefinition.Column(name='Name',
                                       type=SqlType.text(),
                                       nullability=NOT_NULLABLE),
                TableDefinition.Column(name='Location_as_text',
                                       type=SqlType.text(),
                                       nullability=NOT_NULLABLE)
            ]

            # Column 'Name' is inserted into "Extract"."Extract" as-is.
            # Column 'Location' in "Extract"."Extract" of geography type is computed from Column 'Location_as_text' of text type
            # using the expression 'CAST("Location_as_text") AS GEOGRAPHY'.
            # Inserter.ColumnMapping is used for mapping the CAST expression to Column 'Location'.
            column_mappings = [
                'Name',
                Inserter.ColumnMapping(
                    'Location',
                    f'CAST({escape_name("Location_as_text")} AS GEOGRAPHY)')
            ]

            # Data to be inserted.
            data_to_insert = [['Seattle', "point(-122.338083 47.647528)"],
                              ['Munich', "point(11.584329 48.139257)"]]

            # Insert data into "Extract"."Extract" table with CAST expression.
            with Inserter(connection,
                          extract_table,
                          column_mappings,
                          inserter_definition=inserter_definition) as inserter:
                inserter.add_rows(rows=data_to_insert)
                inserter.execute()
            print("The data was added to the table.")

        print("The connection to the Hyper file has been closed.")
    print("The Hyper process has been shut down.")
from tableauhyperapi import HyperProcess, Telemetry, \
    Connection, CreateMode, \
    NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \
    Inserter, \
    escape_name, escape_string_literal, \
    TableName, Name, \
    HyperException

# The table is called "Extract" and will be created in the "Extract" schema.
# This has historically been the default table name and schema for extracts created by Tableau
extract_table = TableDefinition(
    table_name=TableName("Extract", "Extract"),
    columns=[
        TableDefinition.Column(name='Name',
                               type=SqlType.text(),
                               nullability=NOT_NULLABLE),
        TableDefinition.Column(name='Location',
                               type=SqlType.geography(),
                               nullability=NOT_NULLABLE)
    ])


def run_insert_spatial_data_to_a_hyper_file():
    """
    An example of how to add spatial data to a Hyper file.
    """
    print("EXAMPLE - Add spatial data to a Hyper file ")
    path_to_database = Path("spatial_data.hyper")

    # Starts the Hyper Process with telemetry enabled to send data to Tableau.
    # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
Esempio n. 27
0
hyper_file_path = "hyper_for_csv.hyper"
table_name = TableName("Extract", "Extract")
output_name = "output.csv"

path_to_database = Path(hyper_file_path)

# The table is called "Extract" and will be created in the "Extract" schema
# and contains four columns.
extract_table = TableDefinition(
    table_name=table_name,
    columns=[
        TableDefinition.Column(name='Customer ID',
                               type=SqlType.text(),
                               nullability=NULLABLE),
        TableDefinition.Column(name='Customer Name',
                               type=SqlType.text(),
                               nullability=NULLABLE),
        TableDefinition.Column(name='Loyalty Reward Points',
                               type=SqlType.big_int(),
                               nullability=NOT_NULLABLE),
        TableDefinition.Column(name='Segment',
                               type=SqlType.text(),
                               nullability=NULLABLE)
    ])


def insert_data():
    """
    Creates a simple .hyper file. For more on this, see the below example:
    https://github.com/tableau/hyper-api-samples/blob/main/Tableau-Supported/Python/insert_data_into_single_table.py
    """
    print("Creating single table for conversion.")
Esempio n. 28
0
# -*- coding: utf-8 -*-
"""
Created on Thu Jul  2 21:35:47 2020

@author: Manish Chauhan || +91-9774083186
"""
from tableauhyperapi import HyperProcess, Inserter, Telemetry, Connection, CreateMode, NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, escape_name, escape_string_literal, HyperException, TableName

# Starting local hyper instance

extract_table = TableDefinition(
    table_name=TableName('extract', 'extract'),
    columns=[
        TableDefinition.Column("Sno", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("Name", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("Sale", SqlType.big_int()),
        TableDefinition.Column("Profit", SqlType.big_int()),
        #TableDefinition.Column("Date", SqlType.date())
    ])

data_to_insert = [["1", "Vishakha", 99999, 80]]


def run_create_hyper_file_from_csv():

    print("Inside Fucntion to pick data from CSV into table in new Hyper file")
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:

        # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists
        with Connection(
                endpoint=hyper.endpoint,
Esempio n. 29
0
from pathlib import Path

from tableauhyperapi import HyperProcess, Telemetry, \
    Connection, CreateMode, \
    NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \
    Inserter, \
    escape_name, escape_string_literal, \
    HyperException

product_table = TableDefinition(
    # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace.
    table_name="Products",
    columns=[
        TableDefinition.Column("category", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("title", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("price", SqlType.double(), NOT_NULLABLE),
        TableDefinition.Column("available_quantity", SqlType.big_int(),
                               NOT_NULLABLE),
        TableDefinition.Column("sold_quantity", SqlType.big_int(),
                               NOT_NULLABLE),
        TableDefinition.Column("permalink", SqlType.text(), NOT_NULLABLE)
    ])


def call_mlapi_to_dict(SearchCategory):

    print("Start call_mlapi_to_dict.")

    try:
        query = {'1': SearchCategory, 'limit': '50'}
        response = requests.get(
Esempio n. 30
0
query_res['column_ex4'] = query_res['column_ex4'].astype(str)
query_res['column_ex5'] = query_res['column_ex5'].astype(str)
query_res['column_ex6'] = query_res['column_ex6'].astype(str)

with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
    with Connection(endpoint=hyper.endpoint,
                    database=path_to_database,
                    create_mode=CreateMode.CREATE_AND_REPLACE) as connection:
        connection.catalog.create_schema('Extract')
        # Define the hyper table (Same definition as the full refresh)
        example_table = TableDefinition(TableName('Extract', 'Extract'), [
            TableDefinition.Column('column_ex', SqlType.varchar(500)),
            TableDefinition.Column('column_ex2', SqlType.varchar(500)),
            TableDefinition.Column('column_ex_int', SqlType.int()),
            TableDefinition.Column('column_ex3', SqlType.varchar(500)),
            TableDefinition.Column('column_ex_int2', SqlType.int()),
            TableDefinition.Column('column_ex4', SqlType.varchar(500)),
            TableDefinition.Column('column_ex5', SqlType.varchar(500)),
            TableDefinition.Column('column_ex_int3', SqlType.int()),
            TableDefinition.Column('column_ex6', SqlType.varchar(500)),
        ])
        print("The table is defined.")
        connection.catalog.create_table(table_definition=example_table)
        # Insert data from dataframe to hyper table
        with Inserter(connection, example_table) as inserter:
            for i in range(len(query_res)):
                inserter.add_row(
                    [ query_res['column_ex'][i],  \
                     query_res['column_ex2'][i],  \
                     int(query_res['column_ex_int'][i]), \
                     query_res['column_ex3'][i], \