Example #1
0
def _read_table(*, connection: tab_api.Connection,
                table: TableType) -> pd.DataFrame:
    if isinstance(table, str):
        table = tab_api.TableName(table)

    table_def = connection.catalog.get_table_definition(table)
    columns = table_def.columns

    dtypes: Dict[str, str] = {}
    for column in columns:
        column_type = pantab_types._ColumnType(column.type, column.nullability)
        dtypes[column.name.unescaped] = _tableau_to_pandas_type(column_type)

    with connection.execute_query(f"SELECT * from {table}") as result:
        df = pd.DataFrame(result)

    df.columns = dtypes.keys()
    # The tableauhyperapi.Timestamp class is not implicitly convertible to a datetime
    # so we need to run an apply against applicable types
    for key, val in dtypes.items():
        if val == "datetime64[ns]":
            df[key] = df[key].apply(lambda x: x._to_datetime())
        elif val == "datetime64[ns, UTC]":
            df[key] = df[key].apply(lambda x: x._to_datetime()).dt.tz_localize(
                "UTC")
        elif val == "timedelta64[ns]":
            df[key] = df[key].apply(_interval_to_timedelta)

    df = df.astype(dtypes)
    df = df.fillna(value=np.nan)  # Replace any appearances of None

    return df
Example #2
0
def _read_table(*, connection: tab_api.Connection,
                table: TableType) -> pd.DataFrame:
    if isinstance(table, str):
        table = tab_api.TableName(table)

    table_def = connection.catalog.get_table_definition(table)
    columns = table_def.columns

    dtypes: Dict[str, str] = {}
    for column in columns:
        column_type = pantab_types._ColumnType(column.type, column.nullability)
        try:
            dtypes[column.name.
                   unescaped] = pantab_types._pandas_types[column_type]
        except KeyError as e:
            raise TypeError(
                f"Column {column.name} has unsupported datatype {column.type} "
                f"with nullability {column.nullability}") from e

    query = f"SELECT * from {table}"
    dtype_strs = tuple(dtypes.values())

    df = pd.DataFrame(
        libreader.read_hyper_query(connection._cdata, query, dtype_strs))

    df.columns = dtypes.keys()

    # TODO: remove this hackery...
    for k, v in dtypes.items():
        dtypes[k] = "object"

    df = df.astype(dtypes)
    df = df.fillna(value=np.nan)  # Replace any appearances of None

    return df
Example #3
0
def _read_table(*, connection: tab_api.Connection,
                table: TableType) -> pd.DataFrame:
    if isinstance(table, str):
        table = tab_api.TableName(table)

    table_def = connection.catalog.get_table_definition(table)
    columns = table_def.columns

    dtypes: Dict[str, str] = {}
    for column in columns:
        column_type = pantab_types._ColumnType(column.type, column.nullability)
        try:
            dtypes[column.name.
                   unescaped] = pantab_types._pandas_types[column_type]
        except KeyError as e:
            raise TypeError(
                f"Column {column.name} has unsupported datatype {column.type} "
                f"with nullability {column.nullability}") from e

    query = f"SELECT * from {table}"
    with connection.execute_query(query) as result:
        return _read_query_result(result, dtypes)
Example #4
0
def _read_table(*, connection: tab_api.Connection, table: TableType) -> pd.DataFrame:
    if isinstance(table, str):
        table = tab_api.TableName(table)

    table_def = connection.catalog.get_table_definition(table)
    columns = table_def.columns

    dtypes: Dict[str, str] = {}
    for column in columns:
        column_type = pantab_types._ColumnType(column.type, column.nullability)
        dtypes[column.name.unescaped] = _tableau_to_pandas_type(column_type)

    address = int(str(connection._cdata)[:-1].split()[-1], base=16)  # HACK :-X
    query = f"SELECT * from {table}"
    dtype_strs = tuple(dtypes.values())

    df = pd.DataFrame(libreader.read_hyper_query(address, query, dtype_strs))

    df.columns = dtypes.keys()
    df = df.astype(dtypes)
    df = df.fillna(value=np.nan)  # Replace any appearances of None

    return df
Example #5
0
def _read_query_result(
    result: tab_api.Result,
    dtypes: Optional[Dict[str, str]],
) -> pd.DataFrame:
    if dtypes is None:
        dtypes = {}
        # Construct data types from result
        for column in result.schema.columns:
            # `result.schema` does not provide nullability information.
            # Lwt's err on the safe side and always assume they are nullable
            nullability = tab_api.Nullability.NULLABLE
            column_type = pantab_types._ColumnType(column.type, nullability)
            try:
                dtypes[column.name.
                       unescaped] = pantab_types._pandas_types[column_type]
            except KeyError as e:
                raise TypeError(
                    f"Column {column.name} has unsupported datatype {column.type} "
                    f"with nullability {column.nullability}") from e

    # Call native library to read tuples from result set
    dtype_strs = tuple(dtypes.values())
    df = pd.DataFrame(
        libreader.read_hyper_query(result._Result__cdata, dtype_strs))

    df.columns = dtypes.keys()

    # TODO: remove this hackery...
    for k, v in dtypes.items():
        if v == "date":
            dtypes[k] = "datetime64[ns]"

    df = df.astype(dtypes)
    df = df.fillna(value=np.nan)  # Replace any appearances of None

    return df