예제 #1
0
def test_read_doesnt_modify_existing_file(df, tmp_hyper):
    pantab.frame_to_hyper(df, tmp_hyper, table="test")
    last_modified = tmp_hyper.stat().st_mtime

    # Try out our read methods
    pantab.frame_from_hyper(tmp_hyper, table="test")
    pantab.frames_from_hyper(tmp_hyper)

    # Neither should not update file stats
    assert last_modified == tmp_hyper.stat().st_mtime
예제 #2
0
def test_reports_unsupported_type(datapath):
    """
    Test that we report an error if we encounter an unsupported column type.
    Previously, we did not do so but instead assumed that all unsupported columns
    would be string columns. This led to very fascinating failures.
    """
    db_path = datapath / "geography.hyper"
    with pytest.raises(
        TypeError, match=r"Column \"x\" has unsupported datatype GEOGRAPHY"
    ):
        pantab.frame_from_hyper(db_path, table="test")
예제 #3
0
def test_months_in_interval_raises(df, tmp_hyper, monkeypatch):
    # Monkeypatch a new constructor that hard codes months
    def __init__(self, months: int, days: int, microseconds: int):
        self.months = 1
        self.days = days
        self.microseconds = microseconds

    monkeypatch.setattr(pantab._writer.tab_api.Interval, "__init__", __init__)
    pantab.frame_to_hyper(df, tmp_hyper, table="test")
    with pytest.raises(ValueError,
                       match=r"Cannot read Intervals with month components\."):
        pantab.frame_from_hyper(tmp_hyper, table="test")

    with pytest.raises(ValueError,
                       match=r"Cannot read Intervals with month components\."):
        pantab.frames_from_hyper(tmp_hyper)
예제 #4
0
def test_read_non_roundtrippable(datapath):
    result = pantab.frame_from_hyper(datapath / "dates.hyper",
                                     table=TableName("Extract", "Extract"))
    expected = pd.DataFrame(
        [["1900-01-01", "2000-01-01"], [pd.NaT, "2050-01-01"]],
        columns=["Date1", "Date2"],
        dtype="datetime64[ns]",
    )
    tm.assert_frame_equal(result, expected)
예제 #5
0
def test_external_hyper_connection_and_process_error(df, tmp_hyper):
    with HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with Connection(hyper.endpoint, tmp_hyper,
                        CreateMode.CREATE) as connection:
            expected_msg = (
                "hyper_process parameter is useless because `Connection` is provided"
            )
            with pytest.raises(ValueError, match=expected_msg):
                pantab.frame_from_hyper(connection,
                                        table="test",
                                        hyper_process=hyper)

            with pytest.raises(ValueError, match=expected_msg):
                pantab.frame_from_hyper_query(connection,
                                              "SELECT * FROM test",
                                              hyper_process=hyper)

            with pytest.raises(ValueError, match=expected_msg):
                pantab.frames_from_hyper(connection, hyper_process=hyper)
예제 #6
0
def test_reads_non_writeable_strings(datapath):
    result = pantab.frame_from_hyper(
        datapath / "non_pantab_writeable.hyper", table=TableName("public", "table")
    )

    expected = pd.DataFrame([["row1"], ["row2"]], columns=["Non-Nullable String"])
    if compat.PANDAS_100:
        expected = expected.astype("string")

    tm.assert_frame_equal(result, expected)
예제 #7
0
def convert_to_csv():
    """
    Leverages pantab and pandas to convert a .hyper file to a df, and then convert
    the df to a csv file.
    """

    # Uses pantab to convert the hyper file to a df.
    df = pantab.frame_from_hyper(hyper_file_path, table=table_name)
    print("Converting to CSV...")

    # Simple pandas->csv operation.
    df.to_csv(output_name)
예제 #8
0
def test_error_on_first_column(df, tmp_hyper, monkeypatch):
    """
    We had a defect due to which pantab segfaulted when an error occured in one of
    the first two columns. This test case is a regression test against that.
    """
    # Monkeypatch a new constructor that hard codes months
    def __init__(self, months: int, days: int, microseconds: int):
        self.months = 1
        self.days = days
        self.microseconds = microseconds

    monkeypatch.setattr(pantab._writer.tab_api.Interval, "__init__", __init__)

    df = pd.DataFrame(
        [[pd.Timedelta("1 days 2 hours 3 minutes 4 seconds")]],
        columns=["timedelta64"],
    ).astype({"timedelta64": "timedelta64[ns]"})
    pantab.frame_to_hyper(df, tmp_hyper, table="test")

    with pytest.raises(
        ValueError, match=r"Cannot read Intervals with month components\."
    ):
        pantab.frame_from_hyper(tmp_hyper, table="test")
예제 #9
0
def test_basic(df, tmp_hyper, table_name, table_mode):
    # Write twice; depending on mode this should either overwrite or duplicate entries
    pantab.frame_to_hyper(df,
                          tmp_hyper,
                          table=table_name,
                          table_mode=table_mode)
    pantab.frame_to_hyper(df,
                          tmp_hyper,
                          table=table_name,
                          table_mode=table_mode)
    result = pantab.frame_from_hyper(tmp_hyper, table=table_name)

    expected = df.copy()

    if table_mode == "a":
        expected = pd.concat([expected, expected]).reset_index(drop=True)

    assert_roundtrip_equal(result, expected)
예제 #10
0
def test_missing_data(tmp_hyper, table_name, table_mode):
    df = pd.DataFrame([[np.nan], [1]], columns=list("a"))
    df["b"] = pd.Series([None, np.nan], dtype=object)  # no inference
    df["c"] = pd.Series([np.nan, "c"])

    pantab.frame_to_hyper(df,
                          tmp_hyper,
                          table=table_name,
                          table_mode=table_mode)
    pantab.frame_to_hyper(df,
                          tmp_hyper,
                          table=table_name,
                          table_mode=table_mode)

    result = pantab.frame_from_hyper(tmp_hyper, table=table_name)
    expected = pd.DataFrame([[np.nan, np.nan, np.nan], [1, np.nan, "c"]],
                            columns=list("abc"))
    if table_mode == "a":
        expected = pd.concat([expected, expected]).reset_index(drop=True)

    tm.assert_frame_equal(result, expected)
예제 #11
0
def test_roundtrip_with_external_hyper_connection(df, tmp_hyper):
    with HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        pantab.frames_to_hyper({
            "test": df,
            "test2": df
        },
                               tmp_hyper,
                               hyper_process=hyper)

        with Connection(hyper.endpoint, tmp_hyper,
                        CreateMode.NONE) as connection:
            result = pantab.frame_from_hyper(connection, table="test")
            assert_roundtrip_equal(result, df)

            result = pantab.frame_from_hyper_query(connection,
                                                   "SELECT * FROM test")
            assert result.size == 63

            result = pantab.frames_from_hyper(connection)
            assert set(result.keys()) == set(
                (TableName("public", "test"), TableName("public", "test2")))
            for val in result.values():
                assert_roundtrip_equal(val, df)
예제 #12
0
    def test_roundtrip(self, df):
        test_data = os.path.join(self.data_dir, 'test.hyper')
        with open(test_data, 'rb') as infile:
            data = infile.read()

        # Ideally we could just use a buffer, but the Tableau SDK
        # requires a physical string to be passed to the Extract object
        # Because it creates more than just the .hyper file, we need to
        # create a temporary directory for it to write to
        with tempfile.TemporaryDirectory() as tmp:
            fn = os.path.join(tmp, 'test.hyper')
            pantab.frame_to_hyper(df, fn)
            comp = pantab.frame_from_hyper(fn)

        # Because Tableau only supports the 64 bit variants, upcast the
        # particular df dtypes that are lower bit
        df = df.astype({
            'foo': np.int64,
            'bar': np.int64,
            'qux': np.float64,
        })

        tm.assert_frame_equal(df, comp)
예제 #13
0
def test_roundtrip_with_external_hyper_process(df, tmp_hyper):
    default_log_path = Path.cwd() / "hyperd.log"
    if default_log_path.exists():
        default_log_path.unlink()

    # By passing in a pre-spawned HyperProcess, one can e.g. avoid creating a log file
    parameters = {"log_config": ""}
    with HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU,
                      parameters=parameters) as hyper:
        # test frame_to_hyper/frame_from_hyper
        pantab.frame_to_hyper(df, tmp_hyper, table="test", hyper_process=hyper)
        result = pantab.frame_from_hyper(tmp_hyper,
                                         table="test",
                                         hyper_process=hyper)
        assert_roundtrip_equal(result, df)

        # test frame_from_hyper_query
        result = pantab.frame_from_hyper_query(tmp_hyper,
                                               "SELECT * FROM test",
                                               hyper_process=hyper)
        assert result.size == 63

        # test frames_to_hyper/frames_from_hyper
        pantab.frames_to_hyper({
            "test2": df,
            "test": df
        },
                               tmp_hyper,
                               hyper_process=hyper)
        result = pantab.frames_from_hyper(tmp_hyper, hyper_process=hyper)
        assert set(result.keys()) == set(
            (TableName("public", "test"), TableName("public", "test2")))
        for val in result.values():
            assert_roundtrip_equal(val, df)

    assert not default_log_path.exists()
예제 #14
0
# %%
import pandas as pd
url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
csv = 'nyt-covid19-us-counties.csv'
# !curl  {url} > {csv}
nyt_df = pd.read_csv(csv)
nyt_df.head()

# %%
nyt_df.dtypes

# %%
url = 'https://data.world/covid-19-data-resource-hub/covid-19-case-counts/workspace/file?filename=COVID-19+Cases.hyper'
hyper = 'tableau-covid19.hyper'
# !curl {url} > {hyper}
tableau_df = pantab.frame_from_hyper(hyper, table='Extract')
tableau_df.head()

# %%
pantab.frame_from_hyper(hyper, table='Extract')

# %%
# !pwd

# %% [markdown]
# ### FAO Stat database

# %%
# get database catalogue
catalog_url = 'http://fenixservices.fao.org/faostat/static/bulkdownloads/datasets_E.json'
catalog = pd.DataFrame.from_records(
예제 #15
0
 def test_frame_from_file_raises(self, df):
     with pytest.raises(NotImplementedError,
                        message="Not possible with "
                        "current SDK"):
         pantab.frame_from_hyper('foo.hyper')
예제 #16
0
 def test_frame_from_file_raises_extract(self, df):
     with pytest.raises(ValueError,
                        message="The Tableau SDK currently only"
                        " supports a table name of 'Extract'"):
         pantab.frame_from_hyper('foo.hyper', table='foo')
예제 #17
0
 def time_read_frame(self, _):
     pantab.frame_from_hyper("test.hyper", table="test")