Beispiel #1
0
def test_multiple_tables(df, tmp_hyper, table_name, table_mode):
    # Write twice; depending on mode this should either overwrite or duplicate entries
    pantab.frames_to_hyper({
        table_name: df,
        "table2": df
    },
                           tmp_hyper,
                           table_mode=table_mode)
    pantab.frames_to_hyper({
        table_name: df,
        "table2": df
    },
                           tmp_hyper,
                           table_mode=table_mode)
    result = pantab.frames_from_hyper(tmp_hyper)

    expected = df.copy()
    if table_mode == "a":
        expected = pd.concat([expected, expected]).reset_index(drop=True)

    # some test trickery here
    if not isinstance(table_name, TableName) or table_name.schema_name is None:
        table_name = TableName("public", table_name)

    assert set(result.keys()) == set(
        (table_name, TableName("public", "table2")))
    for val in result.values():
        assert_roundtrip_equal(val, expected)
Beispiel #2
0
    def create_schema(self, schema_dss, destination_file_path):
        """
        Read the Tableau Hyper file an.

        :param schema_dss: DSS schema from the DSS dataset to export
            example: [{"columns": [{"name": "customer_id", "type": "bigint"}, ...]}, ...]

        :param destination_file_path:
        :return:
        """
        # Read the destination file of the dss
        self.output_file = destination_file_path
        logger.info(
            "Writing the Tableau Hyper file to the following location: {}".
            format(destination_file_path))
        logger.info(
            "The dataset to export has the following schema: {}".format(
                schema_dss))

        dss_columns = schema_dss['columns']
        dss_storage_types = [
            column_descriptor['type'] for column_descriptor in dss_columns
        ]
        self.schema_converter.set_dss_storage_types(dss_storage_types)

        self.is_geo_table = dss_is_geo(schema_dss)
        logger.info("The input dataset contains a geo column: {}".format(
            self.is_geo_table))

        if not self.schema_name or not self.table_name:
            logger.warning("Did not received the table or schema name.")
            raise ValueError("No valid schema or table name received.")

        logger.info("Received target schema {} and table {}".format(
            self.schema_name, self.table_name))

        # Create the Tableau Hyper schema from the DSS schema
        self.output_table_definition = TableDefinition(
            TableName(self.schema_name, self.table_name),
            self.schema_converter.dss_columns_to_hyper_columns(dss_columns))

        # Open connection to file
        self.hyper = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU)
        self.connection = Connection(self.hyper.endpoint, self.output_file,
                                     CreateMode.CREATE_AND_REPLACE)
        assert self.connection is not None
        self.connection.catalog.create_schema(self.schema_name)
        self.connection.catalog.create_table(self.output_table_definition)

        # Handle the geo case
        if self.is_geo_table:
            logger.info("Detected geo column. Creating a temporary table...")
            dss_tmp_schema = geo_to_text(schema_dss)
            dss_tmp_columns = dss_tmp_schema['columns']
            self.tmp_table_definition = TableDefinition(
                TableName(self.schema_name, "tmp_" + self.table_name),
                self.schema_converter.dss_columns_to_hyper_columns(
                    dss_tmp_columns))
            self.connection.catalog.create_table(self.tmp_table_definition)
            logger.info("Created temporary table")
    def read_hyper_columns(self):
        """
        Read from the Tableau Hyper file the columns and schema of the table

        :return: self.hyper_storage_types
        """
        logger.info("Trying to read Tableau Hyper table {}.{} ...".format(
            self.schema_name, self.table_name))
        hyper_table = TableName(self.schema_name, self.table_name)
        self.hyper_table = hyper_table

        try:
            table_def = self.connection.catalog.get_table_definition(
                hyper_table)
        except HyperException as e:
            logger.warning(
                "The target table does not exists in this hyper file. Requested table: {}.{}"
                .format(self.table_name, self.schema_name))
            raise Exception("Table does not exist: {}.{}".format(
                self.schema_name, self.table_name))

        self.hyper_columns = table_def.columns
        self.hyper_storage_types = [
            column.type.tag for column in self.hyper_columns
        ]

        self.dss_columns = self.schema_converter.hyper_columns_to_dss_columns(
            self.hyper_columns)
        self.dss_storage_types = [
            column['type'] for column in self.dss_columns
        ]

        self.schema_converter.set_dss_storage_types(self.dss_storage_types)
        self.schema_converter.set_hyper_storage_types(self.hyper_storage_types)
 def fn_write_data_into_hyper_file(self, in_logger, timer, in_dict):
     if in_dict['action'] == 'append':
         self.fn_get_records_count_from_table(
             in_logger, timer, {
                 'connection': in_dict['connection'],
                 'schema name': in_dict['schema name'],
                 'table name': in_dict['table name'],
             })
         hyper_table = in_dict['connection'].catalog.get_table_definition(
             TableName('Extract', 'Extract'))
     elif in_dict['action'] == 'overwrite':
         self.fn_create_hyper_schema(in_logger, timer, in_dict)
         hyper_table = self.fn_create_hyper_table(
             in_logger, timer, {
                 'columns': in_dict['hyper table columns'],
                 'connection': in_dict['connection'],
                 'schema name': in_dict['schema name'],
                 'table name': in_dict['table name'],
             })
     self.fn_insert_data_into_hyper_table(
         in_logger, timer, {
             'connection': in_dict['connection'],
             'data': in_dict['data'],
             'table': hyper_table,
         })
     self.fn_get_records_count_from_table(
         in_logger, timer, {
             'connection': in_dict['connection'],
             'schema name': in_dict['schema name'],
             'table name': in_dict['table name'],
         })
Beispiel #5
0
def run_create_hyper_file_from_csv():

    print("Inside Fucntion to pick data from CSV into table in new Hyper file")
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:

        # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists
        with Connection(
                endpoint=hyper.endpoint,
                database='C:/Users/admin/Desktop/extrp1.hyper',
                create_mode=CreateMode.CREATE_AND_REPLACE) as connection:
            connection.catalog.create_schema('extract')
            connection.catalog.create_table(table_definition=extract_table)
            # create a path that locates CSV file to be used
            path_to_csv = 'C:/Users/admin/Desktop/testing1.csv'

            # `execute_command` executes a SQL statement and returns the impacted row count.
            count_in_table = connection.execute_command(
                command=
                f"COPY {extract_table.table_name} from {escape_string_literal(path_to_csv)} with "
                f"(format csv, NULL 'NULL', delimiter ',', header)")
            print(count_in_table)
            with Inserter(connection, TableName('extract',
                                                'extract')) as inserter:
                inserter.add_rows(rows=data_to_insert)
                inserter.execute()
 def test_insert_data_into_hyper_file(self):
     data = [(1001, "Jane", "Doe"), (1002, "John", "Doe"),
             (2201, "Elonzo", "Smith")]
     name = "output"
     table_def = TableDefinition(
         table_name=TableName("Extract", "Extract"),
         columns=[
             TableDefinition.Column(name=Name("id"),
                                    type=SqlType.big_int(),
                                    nullability=NULLABLE),
             TableDefinition.Column(name=Name("first_name"),
                                    type=SqlType.text(),
                                    nullability=NULLABLE),
             TableDefinition.Column(name=Name("last_name"),
                                    type=SqlType.text(),
                                    nullability=NULLABLE)
         ])
     path = insert_data_into_hyper_file(data, name, table_def)
     print(f'Database Path : {path}')
     tables = TestUtils.get_tables("Extract",
                                   "/tmp/hyperleaup/output/output.hyper")
     assert (len(tables) == 1)
     num_rows = TestUtils.get_row_count(
         "Extract", "Extract", "/tmp/hyperleaup/output/output.hyper")
     assert (num_rows == 3)
Beispiel #7
0
def get_table_def(df: DataFrame, schema_name: str,
                  table_name: str) -> TableDefinition:
    """Returns a Tableau TableDefintion given a Spark DataFrame"""
    schema = df.schema
    cols = list(map(convert_struct_field, schema))
    return TableDefinition(table_name=TableName("Extract", "Extract"),
                           columns=cols)
def to_hyper(df,
             hyper_file_name,
             custom_schema="Extract",
             custom_table_name="Extract"):
    """
    Write a Tableau Hyper file from a Pandas DataFrame.

    Currently can only write single table extracts, which is Tableau's
    default way of creating an extract.

    Args:
        df: Specify which DataFrame you want to output
        hyper_file_name: Specify the file name such as "Example.hyper"
        custom_schema: If you need to change the schema name. Defaults to "Extract"
        custom_table_name: If you need to change the schema name. Defaults to "Extract"

    Returns:
        Tableau Hyper file
    """

    # Starts the Hyper Process
    with HyperProcess(
            telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU,
            parameters={"log_config": ""},
    ) as hyper:

        # Creates a .hyper file to put the data into
        with Connection(hyper.endpoint, hyper_file_name,
                        CreateMode.CREATE_AND_REPLACE) as connection:

            connection.catalog.create_schema(custom_schema)

            # create a .hyper compatible column definition
            # from pd DataFrame column names and dtypes
            # using 3 list comprehensions to loop through
            # all the columns in the DataFrame

            column_names = [column for column in df.columns]

            column_dtype = [dtype for dtype in df.dtypes]

            hyper_table = TableDefinition(
                TableName(custom_schema, custom_table_name),
                [
                    TableDefinition.Column(
                        column_names[column], dtype_mapper[str(
                            column_dtype[column])])
                    for column in range(len(column_names))
                ],
            )
            connection.catalog.create_table(hyper_table)

            # Repace NaN with None, otherwise it will not be Null in Tableau
            df.replace({np.nan: None}, inplace=True)

            # Insert the data values into the hyper file
            data_to_insert = df.to_numpy()
            with Inserter(connection, hyper_table) as inserter:
                inserter.add_rows(tqdm((row for row in data_to_insert)))
                inserter.execute()
Beispiel #9
0
 def __init__(self, path="superstore.hyper"):
     super().__init__()
     self._path = Path(__file__).parent / path
     self._table_name = TableName("Extract", "Extract")
     self._hyper =  HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU)
     self._connection = Connection(  endpoint=self._hyper.endpoint, 
                                     database=self._path)
Beispiel #10
0
def createTabTable(tableName, columnHeading, dataTypes):
    extract_table = TableDefinition(table_name=TableName("Extract", tableName))

    for head in columnHeading:
        extract_table.add_column(head, eval(mapper[dataTypes.pop(0)]))

    return extract_table
Beispiel #11
0
 def get_table_def(df: DataFrame,
                   schema_name: str = 'Extract',
                   table_name: str = 'Extract') -> TableDefinition:
     """Returns a Tableau TableDefintion given a Spark DataFrame"""
     schema = df.schema
     cols = list(map(HyperUtils.convert_struct_field, schema))
     return TableDefinition(table_name=TableName(schema_name, table_name),
                            columns=cols)
Beispiel #12
0
def test_read_non_roundtrippable(datapath):
    result = pantab.frame_from_hyper(datapath / "dates.hyper",
                                     table=TableName("Extract", "Extract"))
    expected = pd.DataFrame(
        [["1900-01-01", "2000-01-01"], [pd.NaT, "2050-01-01"]],
        columns=["Date1", "Date2"],
        dtype="datetime64[ns]",
    )
    tm.assert_frame_equal(result, expected)
Beispiel #13
0
def test_reads_non_writeable_strings(datapath):
    result = pantab.frame_from_hyper(
        datapath / "non_pantab_writeable.hyper", table=TableName("public", "table")
    )

    expected = pd.DataFrame([["row1"], ["row2"]], columns=["Non-Nullable String"])
    if compat.PANDAS_100:
        expected = expected.astype("string")

    tm.assert_frame_equal(result, expected)
Beispiel #14
0
 def test_hyper_columns_to_dss_columns(self):
     schema_converter = SchemaConversion()
     path_to_hyper = "data/superstore_sample.hyper"
     hyper = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU)
     connection = Connection(hyper.endpoint, path_to_hyper)
     hyper_table = connection.catalog.get_table_definition(
         TableName('public', 'Customer'))
     connection.close()
     hyper.close()
     dss_columns = schema_converter.hyper_columns_to_dss_columns(
         hyper_table.columns)
     return True
 def fn_create_hyper_table(self, local_logger, timer, in_dict):
     timer.start()
     out_hyper_table = TableDefinition(
         TableName(in_dict['schema name'], in_dict['table name']),
         columns=in_dict['columns'],
     )
     in_dict['connection'].catalog.create_table(table_definition=out_hyper_table)
     local_logger.info(self.locale.gettext(
         'Hyper table "{hyper_table_name}" has been created')
                       .replace('{hyper_table_name}', in_dict['table name']))
     timer.stop()
     return out_hyper_table
def df_to_extract(df, output_path):
    '''
    Converts a Pandas dataframe to a Tableau Extract.

    Parameters
    ----------
    df (pandas dataframe): Dataframe to turn into a Tableau extract
    output_path (str): Where to create the Tableau extract
    ''' 

    # Replace nan's with 0
    df = df.replace(np.nan, 0.0, regex=True)

    print('Creating Tableau data extract...')
    with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with Connection(hyper.endpoint, output_path, CreateMode.CREATE_AND_REPLACE) as connection:
            
            # Create schema
            connection.catalog.create_schema('Extract')

            # Create list of column definitions, based on the datatypes in pandas dataframe
            dtype_map = {
                'int32': SqlType.int(),
                'int64': SqlType.big_int(),
                'float32': SqlType.double(),
                'float64': SqlType.double(),
                'datetime64[ns]': SqlType.date(),
                'object': SqlType.text() 
            }
            table_def = []

            # Get column headers to loop through them
            df_columns = list(df)

            for col_header in df_columns:
                dtype_str = str(df[col_header].dtype)

                # Use dtype_str to lookup appropiate SqlType from dtype_map and append new column to table definition
                table_def.append(TableDefinition.Column(col_header, dtype_map[dtype_str]))
                
            # Define table
            extract_table = TableDefinition(TableName('Extract', 'Extract'), table_def)

            # Create table
            connection.catalog.create_table(extract_table)

            # Insert data
            with Inserter(connection, extract_table) as inserter:
                for idx, row in df.iterrows():
                    inserter.add_row(row)
                
                inserter.execute() 
Beispiel #17
0
def test_roundtrip_with_external_hyper_connection(df, tmp_hyper):
    with HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        pantab.frames_to_hyper({
            "test": df,
            "test2": df
        },
                               tmp_hyper,
                               hyper_process=hyper)

        with Connection(hyper.endpoint, tmp_hyper,
                        CreateMode.NONE) as connection:
            result = pantab.frame_from_hyper(connection, table="test")
            assert_roundtrip_equal(result, df)

            result = pantab.frame_from_hyper_query(connection,
                                                   "SELECT * FROM test")
            assert result.size == 63

            result = pantab.frames_from_hyper(connection)
            assert set(result.keys()) == set(
                (TableName("public", "test"), TableName("public", "test2")))
            for val in result.values():
                assert_roundtrip_equal(val, df)
Beispiel #18
0
    def run(self, args):
        """ Runs the command
        :param args: Arguments from argparse.Namespace
        """
        input_file = args.input_file
        output_file = args.output_file
        wkt_file = Path(args.wkt_path)
        role_name = args.role_name
        id_field = args.id_field
        # Grab the CSV
        csv_query = CsvQueryClass()
        csv_query.open_csv(wkt_file)

        # if the output file already exists, delete
        if os.path.exists(output_file):
            if os.name == "nt":  # deal with Windows
                os.remove(output_file)
            else:
                subprocess.call(["rm", "-rf", output_file])
        shutil.copyfile(input_file, output_file)

        # Starts the Hyper Process with telemetry enabled to send data to Tableau.
        # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
        with HyperProcess(
                telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
            with Connection(endpoint=hyper.endpoint,
                            database=output_file) as connection:
                table_name = TableName(
                    "public", "LocalData" + role_name
                )  #TODO: Make this dynamic based on an input parameter.
                geo_name = Name('Geometry')
                map_code_name = Name('MapCode')
                latitude_name = Name('Latitude')
                longitude_name = Name('Longitude')

                connection.execute_query(
                    f"ALTER TABLE {table_name} ADD COLUMN {geo_name} TEXT,"
                    f" ADD COLUMN {map_code_name} INTEGER").close()
                for mrow in csv_query.rows:
                    lat = mrow['Latitude']
                    lng = mrow['Longitude']
                    wkt = mrow['WKT']
                    with connection.execute_query(
                            f"UPDATE {table_name}" +
                            f" SET {geo_name}={escape_string_literal(wkt)}, {map_code_name}=0"
                            + f" WHERE {latitude_name}={lat}"
                            f" AND {longitude_name}={lng}") as result:
                        print(f"{result.affected_row_count} rows changed")
        print('done')
def hyper_prepare(hyper_path, functional_ordered_column, column_value):
    """Function that prepares the given hyper file: based on the hyper's path, the functional ordered column and its value,
     the hyper file is cleaned from the latest set of data by deleting all data with the given column value or greater values
     """
    path_to_database = Path(hyper_path).expanduser().resolve()
    with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU,
                      user_agent=os.path.basename(__file__)) as hyper:
        with Connection(endpoint=hyper.endpoint,
                        database=path_to_database) as connection:
            table_name = TableName("Extract", "Extract")
            rows_affected = connection.execute_command(
                command=
                f'DELETE FROM {table_name} WHERE "{functional_ordered_column}" >= {column_value}'
            )
            return rows_affected
Beispiel #20
0
def test_roundtrip_with_external_hyper_process(df, tmp_hyper):
    default_log_path = Path.cwd() / "hyperd.log"
    if default_log_path.exists():
        default_log_path.unlink()

    # By passing in a pre-spawned HyperProcess, one can e.g. avoid creating a log file
    parameters = {"log_config": ""}
    with HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU,
                      parameters=parameters) as hyper:
        # test frame_to_hyper/frame_from_hyper
        pantab.frame_to_hyper(df, tmp_hyper, table="test", hyper_process=hyper)
        result = pantab.frame_from_hyper(tmp_hyper,
                                         table="test",
                                         hyper_process=hyper)
        assert_roundtrip_equal(result, df)

        # test frame_from_hyper_query
        result = pantab.frame_from_hyper_query(tmp_hyper,
                                               "SELECT * FROM test",
                                               hyper_process=hyper)
        assert result.size == 63

        # test frames_to_hyper/frames_from_hyper
        pantab.frames_to_hyper({
            "test2": df,
            "test": df
        },
                               tmp_hyper,
                               hyper_process=hyper)
        result = pantab.frames_from_hyper(tmp_hyper, hyper_process=hyper)
        assert set(result.keys()) == set(
            (TableName("public", "test"), TableName("public", "test2")))
        for val in result.values():
            assert_roundtrip_equal(val, df)

    assert not default_log_path.exists()
Beispiel #21
0
 def print_table_def(self, schema: str = "Extract", table: str = "Extract"):
     """Prints the table definition for a table in a Hyper file."""
     with HyperProcess(
             telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hp:
         with Connection(endpoint=hp.endpoint,
                         database=self.path) as connection:
             table_name = TableName(schema, table)
             table_definition = connection.catalog.get_table_definition(
                 name=table_name)
             # Print all column information
             print("root")
             for column in table_definition.columns:
                 print(
                     f"|-- {column.name}: {column.type} (nullable = {column.nullability})"
                 )
Beispiel #22
0
def create():
    with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        request_data = request.get_json()
        print(request_data)
        print("The HyperProcess has started.")
        object_name = "mealprep.hyper"
        file_name = os.environ.get('bucket_name')

        with Connection(
                endpoint=hyper.endpoint,
                database=path_to_database,
                create_mode=CreateMode.CREATE_AND_REPLACE) as connection:
            print("The connection to the Hyper file is open.")
            connection.catalog.create_schema('Extract')
            example_table = TableDefinition(TableName('Extract', 'Extract'), [
                TableDefinition.Column('Breakfast', SqlType.text()),
                TableDefinition.Column('Lunch', SqlType.text()),
                TableDefinition.Column('Dinner', SqlType.text()),
            ])
            print("The table is defined.")
            connection.catalog.create_table(example_table)
            print(example_table)
            print(type(example_table))
            with Inserter(connection, example_table) as inserter:
                for i in request_data['data']:
                    inserter.add_row([i['breakfast'], i['lunch'], i['dinner']])

                inserter.execute()
                print("The data was added to the table.")

            print("The connection to the Hyper extract file is closed.")
        print("The HyperProcess has shut down.")

        with open('mealprep.hyper', 'rb') as reader:
            if object_name is None:
                object_name = file_name
            s3_client = boto3.client(
                's3',
                aws_access_key_id=os.environ.get('aws_access_key_id'),
                aws_secret_access_key=os.environ.get('aws_secret_access_key'))
            try:
                response = s3_client.upload_fileobj(reader, file_name,
                                                    object_name)
            except ClientError as e:
                logging.error(e)
                return False

    return redirect(url_for('index'))
Beispiel #23
0
def run_read_data_from_existing_hyper_file():
    """
    An example of how to read and print data from an existing Hyper file.
    """
    print("EXAMPLE - Read data from an existing Hyper file")

    # Path to a Hyper file containing all data inserted into Customer, Product, Orders and LineItems table.
    # See "insert_data_into_multiple_tables.py" for an example that works with the complete schema.
    path_to_source_database = Path(
        __file__).parent / "data" / "superstore_sample_denormalized.hyper"

    # Make a copy of the superstore denormalized sample Hyper file
    path_to_database = Path(
        shutil.copy(
            src=path_to_source_database,
            dst="superstore_sample_denormalized_read.hyper")).resolve()

    # Starts the Hyper Process with telemetry enabled to send data to Tableau.
    # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:

        # Connect to existing Hyper file "superstore_sample_denormalized_read.hyper".
        with Connection(endpoint=hyper.endpoint,
                        database=path_to_database) as connection:
            # The table names in the "Extract" schema (the default schema).
            table_names = connection.catalog.get_table_names(schema="Extract")

            for table in table_names:
                table_definition = connection.catalog.get_table_definition(
                    name=table)
                print(f"Table {table.name} has qualified name: {table}")
                for column in table_definition.columns:
                    print(
                        f"Column {column.name} has type={column.type} and nullability={column.nullability}"
                    )
                print("")

            # Print all rows from the "Extract"."Extract" table.
            table_name = TableName("Extract", "Extract")
            print(f"These are all rows in the table {table_name}:")
            # `execute_list_query` executes a SQL query and returns the result as list of rows of data,
            # each represented by a list of objects.
            rows_in_table = connection.execute_list_query(
                query=f"SELECT * FROM {table_name}")
            print(rows_in_table)

        print("The connection to the Hyper file has been closed.")
    print("The Hyper process has been shut down.")
Beispiel #24
0
 def test_to_dss_date(self):
     schema_converter = SchemaConversion()
     path_to_hyper = "data/superstore_sample.hyper"
     hyper = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU)
     connection = Connection(hyper.endpoint, path_to_hyper)
     hyper_table = TableName('public', 'Orders')
     hyper_table_def = connection.catalog.get_table_definition(hyper_table)
     result = connection.execute_query(f'SELECT * FROM {hyper_table}')
     for row in result:
         pass
     sample_date = row[2].to_date()
     dss_date = datetime.datetime(sample_date.year, sample_date.month,
                                  sample_date.day)
     connection.close()
     hyper.close()
     dss_columns = schema_converter.hyper_columns_to_dss_columns(
         hyper_table_def.columns)
     return True
 def fn_hyper_read(self, in_logger, timer, in_dict):
     timer.start()
     # once Hyper is opened we can get data out
     query_to_run = f"SELECT * FROM {TableName('Extract', 'Extract')}"
     in_logger.debug(self.locale.gettext(
         'Hyper SQL about to be executed is: {hyper_sql}')
                     .replace('{hyper_sql}', str(query_to_run)))
     result_set = in_dict['connection'].execute_list_query(query=query_to_run)
     out_data_frame = pd.DataFrame(result_set)
     in_logger.debug(self.locale.gettext(
         'Hyper SQL executed with success and {rows_counted} have been retrieved')
                     .replace('{rows_counted}', str(len(out_data_frame))))
     table_definition = in_dict['connection'].catalog.get_table_definition(
         name=TableName('Extract', 'Extract'))
     table_columns = self.fn_get_column_names_from_table(in_logger, {
         'table definition': table_definition,
     })
     out_data_frame.set_axis(table_columns, axis='columns', inplace=True)
     timer.stop()
     return out_data_frame
def createHyperFile():
    dict = parseData()
    file = "/Users/jharris/Desktop/workbookUsers.hyper"
    cols = dict['cols']
    data = dict['data']

    with HyperProcess(
            telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with Connection(hyper.endpoint, file,
                        CreateMode.CREATE_AND_REPLACE) as connection:
            connection.catalog.create_schema('Extract')

            table = TableDefinition(
                TableName('Extract', 'Extract'),
                [TableDefinition.Column(col, SqlType.text()) for col in cols])

            connection.catalog.create_table(table)

            with Inserter(connection, table) as inserter:
                inserter.add_rows(rows=data)
                inserter.execute()
Beispiel #27
0
    def _hyper_table_definition(self, source_table, hyper_table_name="Extract"):
        """
        Build a hyper table definition from source_schema

        source_table (obj): Source table (Instance of google.cloud.bigquery.table.Table)
        hyper_table_name (string): Name of the target Hyper table, default="Extract"

        Returns a tableauhyperapi.TableDefinition Object
        """

        logger.debug(
            "Building Hyper TableDefinition for table {}".format(source_table.reference)
        )
        target_cols = []
        for source_field in source_table.schema:
            this_name = source_field.name
            this_type = self._hyper_sql_type(source_field)
            this_col = TableDefinition.Column(name=this_name, type=this_type)

            # Check for Nullability
            this_mode = source_field.mode
            if this_mode == "REPEATED":
                raise (
                    HyperSQLTypeMappingError(
                        "Field mode REPEATED is not implemented in Hyper"
                    )
                )
            if this_mode == "REQUIRED":
                this_col = TableDefinition.Column(
                    this_name, this_type, Nullability.NOT_NULLABLE
                )

            target_cols.append(this_col)
            logger.debug("..Column {} - Type {}".format(this_name, this_type))

        target_schema = TableDefinition(
            table_name=TableName("Extract", hyper_table_name), columns=target_cols
        )

        return target_schema
def Incremental_refresh(result):
    try:
        with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyperprocess:
            #print("The HyperProcess has started.")
            LogFileWrite("The HyperProcess has started.")
            print(hyperprocess.is_open)
            if hyperprocess.is_open==True:
                with Connection(hyperprocess.endpoint, 'Facebook_campaigns.hyper', CreateMode.NONE,) as connection: 
                    if connection.is_open==True:
                        print("The connection to the Hyper file is open.")
                        LogFileWrite("The connection to the Hyper file is open.")
                        
                        LogFileWrite("Successfully connected to Facebook Campaign data Table ")
                       # print(Campaign_df["Id"].dtype)
                        #print(range(len(result["Id"])))
                        table_name=TableName('Extract','Campaign_data')
                        max_rowid_query="SELECT MAX("+'"'+'Row_ID'+'"'+f") FROM {table_name}"
                        row_id=connection.execute_scalar_query(max_rowid_query)
                        row_id=row_id+1
                        #print(row_id)
                        with Inserter(connection, TableName('Extract','Campaign_data')) as inserter:
                            inserted_rows=1
                            for i in range(0,len(result["Campaign Id"])):
                                #print(result.loc[i,"Date"])
                                inserter.add_row([
                                int(row_id),
                                datetime.today(),
                                (datetime.strptime(result.loc[i,"Date"], '%Y-%m-%d')),
                                
                                str(result.loc[i,"Account Id"]),
                                str(result.loc[i,"Account Name"]),
                                str(result.loc[i,"Campaign Id"]),
                                str(result.loc[i,"Campaign Name"]),
                                int(result.loc[i,"Impressions"]),
                                int(result.loc[i,"Clicks"]),
                                int(result.loc[i,"Reach"]),
                                float(result.loc[i,"Spend"]),
                                float(result.loc[i,"Frequency"])
                                ])
                                #print("instered")
                                #i=i+1
                                inserted_rows=inserted_rows+1
                                row_id=row_id+1
                            inserter.execute()
                            #print("Instered Rows are " +str(inserted_rows))
                            LogFileWrite("Successfully rows are Instered")
                        table_name=TableName('Extract','Campaign_data')
                        Delet_query=f"DELETE FROM {table_name} WHERE " +'"'+ 'Row_ID'+'"'+" NOT IN("
                        Delet_query+="SELECT MAX("+'"'+'Row_ID'+'"'+f") FROM {table_name} "
                        Delet_query+="GROUP BY " +'"'+'Date'+'",'+'"'+'Campaign Id'+'",'+'"'+'Campaign Name'+'",'
                        Delet_query+='"'+'Account Id'+'",'+'"'+'Impressions'+'",'
                        Delet_query+='"'+'Clicks'+'",'+'"'+'Account Name'+'",'+'"'+'Reach'+'",'+'"'+'Spend'+'",'
                        Delet_query+='"'+'Frequency'+'")'
                        #print(Delet_query)
                        connection.execute_command(Delet_query)
                        print("Deleted Duplicate rows")
                        LogFileWrite("Successfully deleted Duplicate rows")                            
                    else:
                        print("unable to open connection to hyper file")
                        LogFileWrite("unable to open connection to hyper file")
                if connection.is_open==True:
                    connection.close()
                    print("Connection to Hyper file closed")
                    LogFileWrite("Connection to Hyper file closed")
                else:
                    print("Connection to Hyper file closed")
                    LogFileWrite("Connection to Hyper file closed")
                    #print("Connection is open or closed" + str(connection.is_open))
            else:
                print("Unable to start the Hyper process ")
                LogFileWrite("Unable to start the Hyper process ")
        if hyperprocess.is_open==True:
            hyperprocess.close()
            print("Forcefully shutted down the Hyper Process")
            LogFileWrite("Forcefully shutted down the Hyper Process")
        else:
            print("Hyper process is shutted down")
            LogFileWrite("Hyper process is shutted down")
            #print("Connection is open or closed" + str(connection.is_open))
            #print("process is open or closed" + str(hyperprocess.is_open))
    except HyperException as ex:
        LogFileWrite("There is exception in starting Tableau Hyper Process. Exiting...")
        LogFileWrite(str(ex))
        connection.close()
        hyperprocess.close()
        SendEmailMessage()
        sys.exit()
def Full_refresh(result):
    LogFileWrite("Running Full refresh")
    try:
        with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyperprocess:
            print("The HyperProcess has started.")
            LogFileWrite("The HyperProcess has started.")
            print(hyperprocess.is_open)
            if hyperprocess.is_open==True:
                with Connection(hyperprocess.endpoint, 'Facebook_campaigns.hyper', CreateMode.CREATE_AND_REPLACE,) as connection: 
                    if connection.is_open==True:
                        print("The connection to the Hyper file is open.")
                        LogFileWrite("The connection to the Hyper file is open.")
                        connection.catalog.create_schema('Extract')
                        DataTable = TableDefinition(TableName('Extract','Campaign_data'),[
                        ############Below Columns are extracted from Report data API
                        TableDefinition.Column('Row_ID', SqlType.big_int()),
                        TableDefinition.Column('Inserted Date', SqlType.date()),
                        TableDefinition.Column('Date', SqlType.date()),
                        TableDefinition.Column('Account Id', SqlType.varchar(50)),
                        TableDefinition.Column('Account Name', SqlType.text()),
                        TableDefinition.Column('Campaign Id', SqlType.varchar(50)),
                        TableDefinition.Column('Campaign Name', SqlType.text()),
                        TableDefinition.Column('Impressions', SqlType.big_int()),
                        TableDefinition.Column('Clicks', SqlType.big_int()),
                        TableDefinition.Column('Reach', SqlType.big_int()),
                        TableDefinition.Column('Spend', SqlType.double()),
                        TableDefinition.Column('Frequency', SqlType.double()),
                        ])
                        print("The table is defined.")
                        LogFileWrite("Successfully Facebook Campaign Table is defined")
                        connection.catalog.create_table(DataTable)
                       # print(Campaign_df["Id"].dtype)
                        #print(range(len(Campaign_df["Id"])))
                        
                        with Inserter(connection, TableName('Extract','Campaign_data')) as inserter:
                            inserted_rows=1
                            row_id=1
                            for i in range(0,len(result["Campaign Id"])):
                                #print(str(result.loc[i,"CampaignId"]))
                                #print(result.loc[i,"Date"])
                                inserter.add_row([
                                int(row_id),
                                datetime.today(),
                                (datetime.strptime(result.loc[i,"Date"], '%Y-%m-%d')),
                                #(datetime.date(result.loc[i,"Date"])),#, "%Y-%m-%d")),
                                str(result.loc[i,"Account Id"]),
                                str(result.loc[i,"Account Name"]),
                                str(result.loc[i,"Campaign Id"]),
                                str(result.loc[i,"Campaign Name"]),
                                int(result.loc[i,"Impressions"]),
                                int(result.loc[i,"Clicks"]),
                                int(result.loc[i,"Reach"]),
                                float(result.loc[i,"Spend"]),
                                float(result.loc[i,"Frequency"])
                                ])
                                #print("instered")
                                row_id=row_id+1
                                inserted_rows=inserted_rows+1
                            inserter.execute()
                            print("Instered Rows are " +str(inserted_rows))
                            LogFileWrite("Instered Rows are " +str(inserted_rows))
                        table_name=TableName('Extract','Campaign_data')
                        Delet_query=f"DELETE FROM {table_name} WHERE " +'"'+ 'Row_ID'+'"'+" NOT IN("
                        Delet_query+="SELECT MAX("+'"'+'Row_ID'+'"'+f") FROM {table_name} "
                        Delet_query+="GROUP BY " +'"'+'Date'+'",'+'"'+'Campaign Id'+'",'+'"'+'Campaign Name'+'",'
                        Delet_query+='"'+'Account Id'+'",'+'"'+'Impressions'+'",'
                        Delet_query+='"'+'Clicks'+'",'+'"'+'Account Name'+'",'+'"'+'Reach'+'",'+'"'+'Spend'+'",'
                        Delet_query+='"'+'Frequency'+'")'
                        #print(Delet_query)
                        
                        connection.execute_command(Delet_query)
                        print("Deleted Duplicate rows")
                        LogFileWrite("Successfully deleted Duplicate rows")
                    else:
                        print("unable to open connection to hyper file")
                        LogFileWrite("unable to open connection to hyper file")
                if connection.is_open==True:
                    connection.close()
                    print("Connection to Hyper file closed")
                    LogFileWrite("Connection to Hyper file closed")
                else:
                    print("Connection to Hyper file closed")
                    LogFileWrite("Connection to Hyper file closed")
                    print("Connection is open or closed" + str(connection.is_open))
            else:
                print("Unable to start the Hyper process ")
                LogFileWrite("Unable to start the Hyper process ")
        if hyperprocess.is_open==True:
            hyperprocess.close()
            print("Forcefully shutted down the Hyper Process")
            LogFileWrite("Forcefully shutted down the Hyper Process")
        else:
            print("Hyper process is shutted down")
            LogFileWrite("Hyper process is shutted down")
            print("Connection is open or closed" + str(connection.is_open))
            print("process is open or closed" + str(hyperprocess.is_open))
    except HyperException as ex:
        LogFileWrite("There is exception in starting Tableau Hyper Process. Exiting...")
        LogFileWrite(str(ex))
        connection.close()
        hyperprocess.close()
        SendEmailMessage()
        sys.exit()
import shutil

from pathlib import Path

from tableauhyperapi import HyperProcess, Telemetry, \
    Connection, CreateMode, \
    NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \
    Inserter, \
    escape_name, escape_string_literal, \
    TableName, Name, \
    HyperException

# The table is called "Extract" and will be created in the "Extract" schema.
# This has historically been the default table name and schema for extracts created by Tableau
extract_table = TableDefinition(
    table_name=TableName("Extract", "Extract"),
    columns=[
        TableDefinition.Column(name='Name',
                               type=SqlType.text(),
                               nullability=NOT_NULLABLE),
        TableDefinition.Column(name='Location',
                               type=SqlType.geography(),
                               nullability=NOT_NULLABLE)
    ])


def run_insert_spatial_data_to_a_hyper_file():
    """
    An example of how to add spatial data to a Hyper file.
    """
    print("EXAMPLE - Add spatial data to a Hyper file ")