def test_multiple_tables(df, tmp_hyper, table_name, table_mode): # Write twice; depending on mode this should either overwrite or duplicate entries pantab.frames_to_hyper({ table_name: df, "table2": df }, tmp_hyper, table_mode=table_mode) pantab.frames_to_hyper({ table_name: df, "table2": df }, tmp_hyper, table_mode=table_mode) result = pantab.frames_from_hyper(tmp_hyper) expected = df.copy() if table_mode == "a": expected = pd.concat([expected, expected]).reset_index(drop=True) expected["float32"] = expected["float32"].astype(np.float64) # some test trickery here if not isinstance(table_name, tab_api.TableName) or table_name.schema_name is None: table_name = tab_api.TableName("public", table_name) assert set(result.keys()) == set( (table_name, tab_api.TableName("public", "table2"))) for val in result.values(): tm.assert_frame_equal(val, expected)
def _read_table(*, connection: tab_api.Connection, table: TableType) -> pd.DataFrame: if isinstance(table, str): table = tab_api.TableName(table) table_def = connection.catalog.get_table_definition(table) columns = table_def.columns dtypes: Dict[str, str] = {} for column in columns: column_type = pantab_types._ColumnType(column.type, column.nullability) dtypes[column.name.unescaped] = _tableau_to_pandas_type(column_type) with connection.execute_query(f"SELECT * from {table}") as result: df = pd.DataFrame(result) df.columns = dtypes.keys() # The tableauhyperapi.Timestamp class is not implicitly convertible to a datetime # so we need to run an apply against applicable types for key, val in dtypes.items(): if val == "datetime64[ns]": df[key] = df[key].apply(lambda x: x._to_datetime()) elif val == "datetime64[ns, UTC]": df[key] = df[key].apply(lambda x: x._to_datetime()).dt.tz_localize( "UTC") elif val == "timedelta64[ns]": df[key] = df[key].apply(_interval_to_timedelta) df = df.astype(dtypes) df = df.fillna(value=np.nan) # Replace any appearances of None return df
def _read_table(*, connection: tab_api.Connection, table: TableType) -> pd.DataFrame: if isinstance(table, str): table = tab_api.TableName(table) table_def = connection.catalog.get_table_definition(table) columns = table_def.columns dtypes: Dict[str, str] = {} for column in columns: column_type = pantab_types._ColumnType(column.type, column.nullability) try: dtypes[column.name. unescaped] = pantab_types._pandas_types[column_type] except KeyError as e: raise TypeError( f"Column {column.name} has unsupported datatype {column.type} " f"with nullability {column.nullability}") from e query = f"SELECT * from {table}" dtype_strs = tuple(dtypes.values()) df = pd.DataFrame( libreader.read_hyper_query(connection._cdata, query, dtype_strs)) df.columns = dtypes.keys() # TODO: remove this hackery... for k, v in dtypes.items(): dtypes[k] = "object" df = df.astype(dtypes) df = df.fillna(value=np.nan) # Replace any appearances of None return df
def workbook_owners_publish(): workbook_owners_dict = workbook_owners() with hyp.HyperProcess(hyp.Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: print("The HyperProcess has started.") with hyp.Connection(hyper.endpoint, '../data/workbook_owners.hyper', hyp.CreateMode.CREATE_AND_REPLACE) as connection: print("The connection to the Hyper file is open.") connection.catalog.create_schema('Extract') table = hyp.TableDefinition(hyp.TableName('Extract', 'Extract'), [ hyp.TableDefinition.Column('workbook_name', hyp.SqlType.text()), hyp.TableDefinition.Column('owner', hyp.SqlType.text()) ]) print("The table is defined.") connection.catalog.create_table(table) with hyp.Inserter(connection, table) as inserter: for i in workbook_owners_dict: inserter.add_row([i['workbook_name'], i['owner']]) inserter.execute() print("The data was added to the table.") print("The connection to the Hyper extract file is closed.") print("The HyperProcess has shut down.")
def _insert_frame( df: pd.DataFrame, *, connection: tab_api.Connection, table: pantab_types.TableType, table_mode: str, ) -> None: _validate_table_mode(table_mode) if isinstance(table, str): table = tab_api.TableName(table) # Populate insertion mechanisms dependent on column types column_types: List[pantab_types._ColumnType] = [] columns: List[tab_api.TableDefinition.Column] = [] for col_name, dtype in df.dtypes.items(): column_type = _pandas_to_tableau_type(dtype.name) column_types.append(column_type) columns.append( tab_api.TableDefinition.Column( name=col_name, type=column_type.type_, nullability=column_type.nullability, )) # Sanity check for existing table structures if table_mode == "a" and connection.catalog.has_table(table): table_def = connection.catalog.get_table_definition(table) _assert_columns_equal(columns, table_def.columns) else: # New table, potentially new schema table_def = tab_api.TableDefinition(table) for column, column_type in zip(columns, column_types): table_def.add_column(column) if isinstance(table, tab_api.TableName) and table.schema_name: connection.catalog.create_schema_if_not_exists(table.schema_name) connection.catalog.create_table_if_not_exists(table_def) null_mask = np.ascontiguousarray(pd.isnull(df)) # Special handling for conversions df, dtypes = _maybe_convert_timedelta(df) with tab_api.Inserter(connection, table_def) as inserter: # This is a terrible hack but I couldn't find any other way to expose # the memory address of the cdata object at runtime in the Python runtime # take something like <cdata 'hyper_inserter_buffer_t *' 0x7f815192ec60> # and extract just 0x7f815192ec60 # ffi.addressof did not work because this is an opaque pointer address = int(str(inserter._buffer)[:-1].split()[-1], base=16) libwriter.write_to_hyper( df.itertuples(index=False, name=None), null_mask, address, df.shape[1], dtypes, ) inserter.execute()
def _insert_frame( df: pd.DataFrame, *, connection: tab_api.Connection, table: pantab_types.TableType, table_mode: str, ) -> None: _validate_table_mode(table_mode) if isinstance(table, str): table = tab_api.TableName(table) # Populate insertion mechanisms dependent on column types column_types: List[pantab_types._ColumnType] = [] columns: List[tab_api.TableDefinition.Column] = [] for col_name, dtype in df.dtypes.items(): column_type = _pandas_to_tableau_type(dtype.name) column_types.append(column_type) columns.append( tab_api.TableDefinition.Column( name=col_name, type=column_type.type_, nullability=column_type.nullability, )) # Sanity check for existing table structures if table_mode == "a" and connection.catalog.has_table(table): table_def = connection.catalog.get_table_definition(table) _assert_columns_equal(columns, table_def.columns) else: # New table, potentially new schema table_def = tab_api.TableDefinition(table) for column, column_type in zip(columns, column_types): table_def.add_column(column) if isinstance(table, tab_api.TableName) and table.schema_name: connection.catalog.create_schema_if_not_exists(table.schema_name) connection.catalog.create_table_if_not_exists(table_def) null_mask = np.ascontiguousarray(pd.isnull(df)) # Special handling for conversions df, dtypes = _maybe_convert_timedelta(df) with tab_api.Inserter(connection, table_def) as inserter: libwriter.write_to_hyper( df.itertuples(index=False, name=None), null_mask, inserter._buffer, df.shape[1], dtypes, ) inserter.execute()
def _read_table(*, connection: tab_api.Connection, table: TableType) -> pd.DataFrame: if isinstance(table, str): table = tab_api.TableName(table) table_def = connection.catalog.get_table_definition(table) columns = table_def.columns dtypes: Dict[str, str] = {} for column in columns: column_type = pantab_types._ColumnType(column.type, column.nullability) try: dtypes[column.name. unescaped] = pantab_types._pandas_types[column_type] except KeyError as e: raise TypeError( f"Column {column.name} has unsupported datatype {column.type} " f"with nullability {column.nullability}") from e query = f"SELECT * from {table}" with connection.execute_query(query) as result: return _read_query_result(result, dtypes)
def _read_table(*, connection: tab_api.Connection, table: TableType) -> pd.DataFrame: if isinstance(table, str): table = tab_api.TableName(table) table_def = connection.catalog.get_table_definition(table) columns = table_def.columns dtypes: Dict[str, str] = {} for column in columns: column_type = pantab_types._ColumnType(column.type, column.nullability) dtypes[column.name.unescaped] = _tableau_to_pandas_type(column_type) address = int(str(connection._cdata)[:-1].split()[-1], base=16) # HACK :-X query = f"SELECT * from {table}" dtype_strs = tuple(dtypes.values()) df = pd.DataFrame(libreader.read_hyper_query(address, query, dtype_strs)) df.columns = dtypes.keys() df = df.astype(dtypes) df = df.fillna(value=np.nan) # Replace any appearances of None return df
"datetime64_utc": "datetime64[ns, UTC]", "timedelta64": "timedelta64[ns]", "object": "object", }) return df @pytest.fixture def tmp_hyper(tmp_path): """A temporary file name to write / read a Hyper extract from.""" return tmp_path / "test.hyper" @pytest.fixture(params=["w", "a"]) def table_mode(request): """Write or append markers for table handling.""" return request.param @pytest.fixture(params=[ "table", tab_api.Name("table"), tab_api.TableName("table"), tab_api.TableName("public", "table"), tab_api.TableName("nonpublic", "table"), ]) def table_name(request): """Various ways to represent a table in Tableau.""" return request.param
# Not all types are writeable by pantab but should probably be readable # This utility script will help generate those files which can be # incorporate into testing import tableauhyperapi as tab_api if __name__ == "__main__": table = tab_api.TableDefinition( table_name=tab_api.TableName("public", "table"), columns=[ tab_api.TableDefinition.Column( name="Non-Nullable String", type=tab_api.SqlType.text(), nullability=tab_api.NOT_NULLABLE, ) ], ) with tab_api.HyperProcess(telemetry=tab_api.Telemetry. DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: with tab_api.Connection( endpoint=hyper.endpoint, database="non_pantab_writeable.hyper", create_mode=tab_api.CreateMode.CREATE_AND_REPLACE, ) as connection: connection.catalog.create_table(table_definition=table) with tab_api.Inserter(connection, table) as inserter: inserter.add_rows([["row1"], ["row2"]]) inserter.execute()