def test_read_doesnt_modify_existing_file(df, tmp_hyper): pantab.frame_to_hyper(df, tmp_hyper, table="test") last_modified = tmp_hyper.stat().st_mtime # Try out our read methods pantab.frame_from_hyper(tmp_hyper, table="test") pantab.frames_from_hyper(tmp_hyper) # Neither should not update file stats assert last_modified == tmp_hyper.stat().st_mtime
def test_multiple_tables(df, tmp_hyper, table_name, table_mode): # Write twice; depending on mode this should either overwrite or duplicate entries pantab.frames_to_hyper({ table_name: df, "table2": df }, tmp_hyper, table_mode=table_mode) pantab.frames_to_hyper({ table_name: df, "table2": df }, tmp_hyper, table_mode=table_mode) result = pantab.frames_from_hyper(tmp_hyper) expected = df.copy() if table_mode == "a": expected = pd.concat([expected, expected]).reset_index(drop=True) expected["float32"] = expected["float32"].astype(np.float64) # some test trickery here if not isinstance(table_name, tab_api.TableName) or table_name.schema_name is None: table_name = tab_api.TableName("public", table_name) assert set(result.keys()) == set( (table_name, tab_api.TableName("public", "table2"))) for val in result.values(): tm.assert_frame_equal(val, expected)
def test_months_in_interval_raises(df, tmp_hyper, monkeypatch): # Monkeypatch a new constructor that hard codes months def __init__(self, months: int, days: int, microseconds: int): self.months = 1 self.days = days self.microseconds = microseconds monkeypatch.setattr(pantab._writer.tab_api.Interval, "__init__", __init__) pantab.frame_to_hyper(df, tmp_hyper, table="test") with pytest.raises(ValueError, match=r"Cannot read Intervals with month components\."): pantab.frame_from_hyper(tmp_hyper, table="test") with pytest.raises(ValueError, match=r"Cannot read Intervals with month components\."): pantab.frames_from_hyper(tmp_hyper)
def test_external_hyper_connection_and_process_error(df, tmp_hyper): with HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(hyper.endpoint, tmp_hyper, CreateMode.CREATE) as connection: expected_msg = ( "hyper_process parameter is useless because `Connection` is provided" ) with pytest.raises(ValueError, match=expected_msg): pantab.frame_from_hyper(connection, table="test", hyper_process=hyper) with pytest.raises(ValueError, match=expected_msg): pantab.frame_from_hyper_query(connection, "SELECT * FROM test", hyper_process=hyper) with pytest.raises(ValueError, match=expected_msg): pantab.frames_from_hyper(connection, hyper_process=hyper)
def hyper_to_csv(hyper_file, csv_export): """ Conversion from tableauhyperapi table "Dictionary" to a Pandas Dataframe Parameters: hyper_file: File path for the hyper file import csv_export: File path for the csv file export Steps: 1. Load the hyper tables in aggregate via pantab's "frames" from hyper to bypass the need for knowing individual table names 2. Run the dictionary items() and save the dataframe as a Pandas object Returns: df.to_csv(csv_export, index=None) THe expected output to file path with no index, feel free to adjust as needed """ hyperdb = pt.frames_from_hyper(hyper_file) for key, pandas_df in hyperdb.items(): df = pandas_df return df.to_csv(csv_export, index=None)
def test_roundtrip_with_external_hyper_connection(df, tmp_hyper): with HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: pantab.frames_to_hyper({ "test": df, "test2": df }, tmp_hyper, hyper_process=hyper) with Connection(hyper.endpoint, tmp_hyper, CreateMode.NONE) as connection: result = pantab.frame_from_hyper(connection, table="test") assert_roundtrip_equal(result, df) result = pantab.frame_from_hyper_query(connection, "SELECT * FROM test") assert result.size == 63 result = pantab.frames_from_hyper(connection) assert set(result.keys()) == set( (TableName("public", "test"), TableName("public", "test2"))) for val in result.values(): assert_roundtrip_equal(val, df)
def test_roundtrip_with_external_hyper_process(df, tmp_hyper): default_log_path = Path.cwd() / "hyperd.log" if default_log_path.exists(): default_log_path.unlink() # By passing in a pre-spawned HyperProcess, one can e.g. avoid creating a log file parameters = {"log_config": ""} with HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU, parameters=parameters) as hyper: # test frame_to_hyper/frame_from_hyper pantab.frame_to_hyper(df, tmp_hyper, table="test", hyper_process=hyper) result = pantab.frame_from_hyper(tmp_hyper, table="test", hyper_process=hyper) assert_roundtrip_equal(result, df) # test frame_from_hyper_query result = pantab.frame_from_hyper_query(tmp_hyper, "SELECT * FROM test", hyper_process=hyper) assert result.size == 63 # test frames_to_hyper/frames_from_hyper pantab.frames_to_hyper({ "test2": df, "test": df }, tmp_hyper, hyper_process=hyper) result = pantab.frames_from_hyper(tmp_hyper, hyper_process=hyper) assert set(result.keys()) == set( (TableName("public", "test"), TableName("public", "test2"))) for val in result.values(): assert_roundtrip_equal(val, df) assert not default_log_path.exists()
def model_uncertainty(function, x, params, covariance): u_params = [np.sqrt(abs(covariance[i, i])) for i in range(len(params))] derivs = partial_derivatives(function, x, params, u_params) squared_model_uncertainty = sum(derivs[i] * derivs[j] * covariance[i, j] for i in range(len(params)) for j in range(len(params))) return np.sqrt(squared_model_uncertainty) url = "https://public.tableau.com/workbooks/Cases_15982342702770.twb" dbname = "Data/Extracts/federated_12gagec10ajljj1457q361.hyper" workbook_data = requests.get(url).content workbook = zipfile.ZipFile(io.BytesIO(workbook_data)) with tempfile.TemporaryDirectory() as tempdir: dbpath = workbook.extract(dbname, path=tempdir) name, df = pantab.frames_from_hyper(dbpath).popitem() data = [] for cases, date in zip(df['Cases'], df['Date']): try: cases = float(cases) except TypeError: cases = 0 date = np.datetime64(date, 'h') + 24 data.append((date, cases)) data.sort() dates, new = [np.array(a) for a in zip(*data)] # Fill in missing dates when there were zero cases: for date in np.arange(dates[0], dates[-1], 24):