Esempio n. 1
0
def test_read_doesnt_modify_existing_file(df, tmp_hyper):
    pantab.frame_to_hyper(df, tmp_hyper, table="test")
    last_modified = tmp_hyper.stat().st_mtime

    # Try out our read methods
    pantab.frame_from_hyper(tmp_hyper, table="test")
    pantab.frames_from_hyper(tmp_hyper)

    # Neither should not update file stats
    assert last_modified == tmp_hyper.stat().st_mtime
Esempio n. 2
0
def test_multiple_tables(df, tmp_hyper, table_name, table_mode):
    # Write twice; depending on mode this should either overwrite or duplicate entries
    pantab.frames_to_hyper({
        table_name: df,
        "table2": df
    },
                           tmp_hyper,
                           table_mode=table_mode)
    pantab.frames_to_hyper({
        table_name: df,
        "table2": df
    },
                           tmp_hyper,
                           table_mode=table_mode)
    result = pantab.frames_from_hyper(tmp_hyper)

    expected = df.copy()
    if table_mode == "a":
        expected = pd.concat([expected, expected]).reset_index(drop=True)

    expected["float32"] = expected["float32"].astype(np.float64)

    # some test trickery here
    if not isinstance(table_name,
                      tab_api.TableName) or table_name.schema_name is None:
        table_name = tab_api.TableName("public", table_name)

    assert set(result.keys()) == set(
        (table_name, tab_api.TableName("public", "table2")))
    for val in result.values():
        tm.assert_frame_equal(val, expected)
Esempio n. 3
0
def test_months_in_interval_raises(df, tmp_hyper, monkeypatch):
    # Monkeypatch a new constructor that hard codes months
    def __init__(self, months: int, days: int, microseconds: int):
        self.months = 1
        self.days = days
        self.microseconds = microseconds

    monkeypatch.setattr(pantab._writer.tab_api.Interval, "__init__", __init__)
    pantab.frame_to_hyper(df, tmp_hyper, table="test")
    with pytest.raises(ValueError,
                       match=r"Cannot read Intervals with month components\."):
        pantab.frame_from_hyper(tmp_hyper, table="test")

    with pytest.raises(ValueError,
                       match=r"Cannot read Intervals with month components\."):
        pantab.frames_from_hyper(tmp_hyper)
Esempio n. 4
0
def test_external_hyper_connection_and_process_error(df, tmp_hyper):
    with HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with Connection(hyper.endpoint, tmp_hyper,
                        CreateMode.CREATE) as connection:
            expected_msg = (
                "hyper_process parameter is useless because `Connection` is provided"
            )
            with pytest.raises(ValueError, match=expected_msg):
                pantab.frame_from_hyper(connection,
                                        table="test",
                                        hyper_process=hyper)

            with pytest.raises(ValueError, match=expected_msg):
                pantab.frame_from_hyper_query(connection,
                                              "SELECT * FROM test",
                                              hyper_process=hyper)

            with pytest.raises(ValueError, match=expected_msg):
                pantab.frames_from_hyper(connection, hyper_process=hyper)
Esempio n. 5
0
def hyper_to_csv(hyper_file, csv_export):
    """ Conversion from tableauhyperapi table "Dictionary" to a Pandas Dataframe
        Parameters: 
            hyper_file:
                File path for the hyper file import
            csv_export:
                File path for the csv file export
        Steps:
            1. Load the hyper tables in aggregate via pantab's "frames" from hyper to bypass the need for knowing individual table names
            2. Run the dictionary items() and save the dataframe as a Pandas object
        
        Returns:
            df.to_csv(csv_export, index=None) 
                THe expected output to file path with no index, feel free to adjust as needed
    """
    hyperdb = pt.frames_from_hyper(hyper_file)
    for key, pandas_df in hyperdb.items():
        df = pandas_df
    return df.to_csv(csv_export, index=None)
Esempio n. 6
0
def test_roundtrip_with_external_hyper_connection(df, tmp_hyper):
    with HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        pantab.frames_to_hyper({
            "test": df,
            "test2": df
        },
                               tmp_hyper,
                               hyper_process=hyper)

        with Connection(hyper.endpoint, tmp_hyper,
                        CreateMode.NONE) as connection:
            result = pantab.frame_from_hyper(connection, table="test")
            assert_roundtrip_equal(result, df)

            result = pantab.frame_from_hyper_query(connection,
                                                   "SELECT * FROM test")
            assert result.size == 63

            result = pantab.frames_from_hyper(connection)
            assert set(result.keys()) == set(
                (TableName("public", "test"), TableName("public", "test2")))
            for val in result.values():
                assert_roundtrip_equal(val, df)
Esempio n. 7
0
def test_roundtrip_with_external_hyper_process(df, tmp_hyper):
    default_log_path = Path.cwd() / "hyperd.log"
    if default_log_path.exists():
        default_log_path.unlink()

    # By passing in a pre-spawned HyperProcess, one can e.g. avoid creating a log file
    parameters = {"log_config": ""}
    with HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU,
                      parameters=parameters) as hyper:
        # test frame_to_hyper/frame_from_hyper
        pantab.frame_to_hyper(df, tmp_hyper, table="test", hyper_process=hyper)
        result = pantab.frame_from_hyper(tmp_hyper,
                                         table="test",
                                         hyper_process=hyper)
        assert_roundtrip_equal(result, df)

        # test frame_from_hyper_query
        result = pantab.frame_from_hyper_query(tmp_hyper,
                                               "SELECT * FROM test",
                                               hyper_process=hyper)
        assert result.size == 63

        # test frames_to_hyper/frames_from_hyper
        pantab.frames_to_hyper({
            "test2": df,
            "test": df
        },
                               tmp_hyper,
                               hyper_process=hyper)
        result = pantab.frames_from_hyper(tmp_hyper, hyper_process=hyper)
        assert set(result.keys()) == set(
            (TableName("public", "test"), TableName("public", "test2")))
        for val in result.values():
            assert_roundtrip_equal(val, df)

    assert not default_log_path.exists()
def model_uncertainty(function, x, params, covariance):
    u_params = [np.sqrt(abs(covariance[i, i])) for i in range(len(params))]
    derivs = partial_derivatives(function, x, params, u_params)
    squared_model_uncertainty = sum(derivs[i] * derivs[j] * covariance[i, j]
                                    for i in range(len(params))
                                    for j in range(len(params)))
    return np.sqrt(squared_model_uncertainty)


url = "https://public.tableau.com/workbooks/Cases_15982342702770.twb"
dbname = "Data/Extracts/federated_12gagec10ajljj1457q361.hyper"
workbook_data = requests.get(url).content
workbook = zipfile.ZipFile(io.BytesIO(workbook_data))
with tempfile.TemporaryDirectory() as tempdir:
    dbpath = workbook.extract(dbname, path=tempdir)
    name, df = pantab.frames_from_hyper(dbpath).popitem()

data = []
for cases, date in zip(df['Cases'], df['Date']):
    try:
        cases = float(cases)
    except TypeError:
        cases = 0
    date = np.datetime64(date, 'h') + 24
    data.append((date, cases))

data.sort()
dates, new = [np.array(a) for a in zip(*data)]

# Fill in missing dates when there were zero cases:
for date in np.arange(dates[0], dates[-1], 24):