Exemple #1
0
def test_multiple_tables(df, tmp_hyper, table_name, table_mode):
    # Write twice; depending on mode this should either overwrite or duplicate entries
    pantab.frames_to_hyper({
        table_name: df,
        "table2": df
    },
                           tmp_hyper,
                           table_mode=table_mode)
    pantab.frames_to_hyper({
        table_name: df,
        "table2": df
    },
                           tmp_hyper,
                           table_mode=table_mode)
    result = pantab.frames_from_hyper(tmp_hyper)

    expected = df.copy()
    if table_mode == "a":
        expected = pd.concat([expected, expected]).reset_index(drop=True)

    expected["float32"] = expected["float32"].astype(np.float64)

    # some test trickery here
    if not isinstance(table_name,
                      tab_api.TableName) or table_name.schema_name is None:
        table_name = tab_api.TableName("public", table_name)

    assert set(result.keys()) == set(
        (table_name, tab_api.TableName("public", "table2")))
    for val in result.values():
        tm.assert_frame_equal(val, expected)
Exemple #2
0
def _read_table(*, connection: tab_api.Connection,
                table: TableType) -> pd.DataFrame:
    if isinstance(table, str):
        table = tab_api.TableName(table)

    table_def = connection.catalog.get_table_definition(table)
    columns = table_def.columns

    dtypes: Dict[str, str] = {}
    for column in columns:
        column_type = pantab_types._ColumnType(column.type, column.nullability)
        dtypes[column.name.unescaped] = _tableau_to_pandas_type(column_type)

    with connection.execute_query(f"SELECT * from {table}") as result:
        df = pd.DataFrame(result)

    df.columns = dtypes.keys()
    # The tableauhyperapi.Timestamp class is not implicitly convertible to a datetime
    # so we need to run an apply against applicable types
    for key, val in dtypes.items():
        if val == "datetime64[ns]":
            df[key] = df[key].apply(lambda x: x._to_datetime())
        elif val == "datetime64[ns, UTC]":
            df[key] = df[key].apply(lambda x: x._to_datetime()).dt.tz_localize(
                "UTC")
        elif val == "timedelta64[ns]":
            df[key] = df[key].apply(_interval_to_timedelta)

    df = df.astype(dtypes)
    df = df.fillna(value=np.nan)  # Replace any appearances of None

    return df
Exemple #3
0
def _read_table(*, connection: tab_api.Connection,
                table: TableType) -> pd.DataFrame:
    if isinstance(table, str):
        table = tab_api.TableName(table)

    table_def = connection.catalog.get_table_definition(table)
    columns = table_def.columns

    dtypes: Dict[str, str] = {}
    for column in columns:
        column_type = pantab_types._ColumnType(column.type, column.nullability)
        try:
            dtypes[column.name.
                   unescaped] = pantab_types._pandas_types[column_type]
        except KeyError as e:
            raise TypeError(
                f"Column {column.name} has unsupported datatype {column.type} "
                f"with nullability {column.nullability}") from e

    query = f"SELECT * from {table}"
    dtype_strs = tuple(dtypes.values())

    df = pd.DataFrame(
        libreader.read_hyper_query(connection._cdata, query, dtype_strs))

    df.columns = dtypes.keys()

    # TODO: remove this hackery...
    for k, v in dtypes.items():
        dtypes[k] = "object"

    df = df.astype(dtypes)
    df = df.fillna(value=np.nan)  # Replace any appearances of None

    return df
Exemple #4
0
def workbook_owners_publish():

    workbook_owners_dict = workbook_owners()

    with hyp.HyperProcess(hyp.Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        print("The HyperProcess has started.")

        with hyp.Connection(hyper.endpoint, '../data/workbook_owners.hyper',
                            hyp.CreateMode.CREATE_AND_REPLACE) as connection:
            print("The connection to the Hyper file is open.")

            connection.catalog.create_schema('Extract')

            table = hyp.TableDefinition(hyp.TableName('Extract', 'Extract'), [
                hyp.TableDefinition.Column('workbook_name',
                                           hyp.SqlType.text()),
                hyp.TableDefinition.Column('owner', hyp.SqlType.text())
            ])

            print("The table is defined.")

            connection.catalog.create_table(table)

            with hyp.Inserter(connection, table) as inserter:

                for i in workbook_owners_dict:

                    inserter.add_row([i['workbook_name'], i['owner']])

                inserter.execute()

            print("The data was added to the table.")
        print("The connection to the Hyper extract file is closed.")
    print("The HyperProcess has shut down.")
Exemple #5
0
def _insert_frame(
    df: pd.DataFrame,
    *,
    connection: tab_api.Connection,
    table: pantab_types.TableType,
    table_mode: str,
) -> None:
    _validate_table_mode(table_mode)

    if isinstance(table, str):
        table = tab_api.TableName(table)

    # Populate insertion mechanisms dependent on column types
    column_types: List[pantab_types._ColumnType] = []
    columns: List[tab_api.TableDefinition.Column] = []
    for col_name, dtype in df.dtypes.items():
        column_type = _pandas_to_tableau_type(dtype.name)
        column_types.append(column_type)
        columns.append(
            tab_api.TableDefinition.Column(
                name=col_name,
                type=column_type.type_,
                nullability=column_type.nullability,
            ))

    # Sanity check for existing table structures
    if table_mode == "a" and connection.catalog.has_table(table):
        table_def = connection.catalog.get_table_definition(table)
        _assert_columns_equal(columns, table_def.columns)
    else:  # New table, potentially new schema
        table_def = tab_api.TableDefinition(table)

        for column, column_type in zip(columns, column_types):
            table_def.add_column(column)

        if isinstance(table, tab_api.TableName) and table.schema_name:
            connection.catalog.create_schema_if_not_exists(table.schema_name)

        connection.catalog.create_table_if_not_exists(table_def)

    null_mask = np.ascontiguousarray(pd.isnull(df))
    # Special handling for conversions
    df, dtypes = _maybe_convert_timedelta(df)

    with tab_api.Inserter(connection, table_def) as inserter:
        # This is a terrible hack but I couldn't find any other way to expose
        # the memory address of the cdata object at runtime in the Python runtime
        # take something like <cdata 'hyper_inserter_buffer_t *' 0x7f815192ec60>
        # and extract just 0x7f815192ec60
        # ffi.addressof did not work because this is an opaque pointer
        address = int(str(inserter._buffer)[:-1].split()[-1], base=16)
        libwriter.write_to_hyper(
            df.itertuples(index=False, name=None),
            null_mask,
            address,
            df.shape[1],
            dtypes,
        )
        inserter.execute()
Exemple #6
0
def _insert_frame(
    df: pd.DataFrame,
    *,
    connection: tab_api.Connection,
    table: pantab_types.TableType,
    table_mode: str,
) -> None:
    _validate_table_mode(table_mode)

    if isinstance(table, str):
        table = tab_api.TableName(table)

    # Populate insertion mechanisms dependent on column types
    column_types: List[pantab_types._ColumnType] = []
    columns: List[tab_api.TableDefinition.Column] = []
    for col_name, dtype in df.dtypes.items():
        column_type = _pandas_to_tableau_type(dtype.name)
        column_types.append(column_type)
        columns.append(
            tab_api.TableDefinition.Column(
                name=col_name,
                type=column_type.type_,
                nullability=column_type.nullability,
            ))

    # Sanity check for existing table structures
    if table_mode == "a" and connection.catalog.has_table(table):
        table_def = connection.catalog.get_table_definition(table)
        _assert_columns_equal(columns, table_def.columns)
    else:  # New table, potentially new schema
        table_def = tab_api.TableDefinition(table)

        for column, column_type in zip(columns, column_types):
            table_def.add_column(column)

        if isinstance(table, tab_api.TableName) and table.schema_name:
            connection.catalog.create_schema_if_not_exists(table.schema_name)

        connection.catalog.create_table_if_not_exists(table_def)

    null_mask = np.ascontiguousarray(pd.isnull(df))
    # Special handling for conversions
    df, dtypes = _maybe_convert_timedelta(df)

    with tab_api.Inserter(connection, table_def) as inserter:
        libwriter.write_to_hyper(
            df.itertuples(index=False, name=None),
            null_mask,
            inserter._buffer,
            df.shape[1],
            dtypes,
        )
        inserter.execute()
Exemple #7
0
def _read_table(*, connection: tab_api.Connection,
                table: TableType) -> pd.DataFrame:
    if isinstance(table, str):
        table = tab_api.TableName(table)

    table_def = connection.catalog.get_table_definition(table)
    columns = table_def.columns

    dtypes: Dict[str, str] = {}
    for column in columns:
        column_type = pantab_types._ColumnType(column.type, column.nullability)
        try:
            dtypes[column.name.
                   unescaped] = pantab_types._pandas_types[column_type]
        except KeyError as e:
            raise TypeError(
                f"Column {column.name} has unsupported datatype {column.type} "
                f"with nullability {column.nullability}") from e

    query = f"SELECT * from {table}"
    with connection.execute_query(query) as result:
        return _read_query_result(result, dtypes)
Exemple #8
0
def _read_table(*, connection: tab_api.Connection, table: TableType) -> pd.DataFrame:
    if isinstance(table, str):
        table = tab_api.TableName(table)

    table_def = connection.catalog.get_table_definition(table)
    columns = table_def.columns

    dtypes: Dict[str, str] = {}
    for column in columns:
        column_type = pantab_types._ColumnType(column.type, column.nullability)
        dtypes[column.name.unescaped] = _tableau_to_pandas_type(column_type)

    address = int(str(connection._cdata)[:-1].split()[-1], base=16)  # HACK :-X
    query = f"SELECT * from {table}"
    dtype_strs = tuple(dtypes.values())

    df = pd.DataFrame(libreader.read_hyper_query(address, query, dtype_strs))

    df.columns = dtypes.keys()
    df = df.astype(dtypes)
    df = df.fillna(value=np.nan)  # Replace any appearances of None

    return df
Exemple #9
0
        "datetime64_utc": "datetime64[ns, UTC]",
        "timedelta64": "timedelta64[ns]",
        "object": "object",
    })

    return df


@pytest.fixture
def tmp_hyper(tmp_path):
    """A temporary file name to write / read a Hyper extract from."""
    return tmp_path / "test.hyper"


@pytest.fixture(params=["w", "a"])
def table_mode(request):
    """Write or append markers for table handling."""
    return request.param


@pytest.fixture(params=[
    "table",
    tab_api.Name("table"),
    tab_api.TableName("table"),
    tab_api.TableName("public", "table"),
    tab_api.TableName("nonpublic", "table"),
])
def table_name(request):
    """Various ways to represent a table in Tableau."""
    return request.param
# Not all types are writeable by pantab but should probably be readable
# This utility script will help generate those files which can be
# incorporate into testing

import tableauhyperapi as tab_api

if __name__ == "__main__":

    table = tab_api.TableDefinition(
        table_name=tab_api.TableName("public", "table"),
        columns=[
            tab_api.TableDefinition.Column(
                name="Non-Nullable String",
                type=tab_api.SqlType.text(),
                nullability=tab_api.NOT_NULLABLE,
            )
        ],
    )

    with tab_api.HyperProcess(telemetry=tab_api.Telemetry.
                              DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with tab_api.Connection(
                endpoint=hyper.endpoint,
                database="non_pantab_writeable.hyper",
                create_mode=tab_api.CreateMode.CREATE_AND_REPLACE,
        ) as connection:
            connection.catalog.create_table(table_definition=table)

            with tab_api.Inserter(connection, table) as inserter:
                inserter.add_rows([["row1"], ["row2"]])
                inserter.execute()