def test_historical_table_replayer(self): dt1 = to_datetime("2000-01-01T00:00:01 NY") dt2 = to_datetime("2000-01-01T00:00:02 NY") dt3 = to_datetime("2000-01-01T00:00:04 NY") hist_table = new_table([ datetime_col("DateTime", [dt1, dt2, dt3]), int_col("Number", [1, 3, 6]) ]) hist_table2 = new_table([ datetime_col("DateTime", [dt1, dt2, dt3]), int_col("Number", [1, 3, 6]) ]) start_time = to_datetime("2000-01-01T00:00:00 NY") end_time = to_datetime("2000-01-01T00:00:05 NY") replayer = TableReplayer(start_time, end_time) replay_table = replayer.add_table(hist_table, "DateTime") replay_table2 = replayer.add_table(hist_table2, "DateTime") self.assert_table_equals(replay_table, replay_table2) replayer.start() self.assertTrue(replay_table.is_refreshing) self.assertTrue(replay_table2.is_refreshing) self.wait_ticking_table_update(replay_table, row_count=3, timeout=60) self.wait_ticking_table_update(replay_table2, row_count=3, timeout=60) self.assert_table_equals(replay_table, replay_table2) replayer.shutdown() with self.subTest("replayer can't be reused after shutdown."): with self.assertRaises(DHError) as cm: replayer.add_table(hist_table, "DateTime") self.assertIn("RuntimeError", cm.exception.root_cause) with self.subTest("replayer can't be restarted after shutdown."): with self.assertRaises(DHError): replayer.start() with self.subTest("Add table after replayer is restarted."): replayer = TableReplayer(start_time, end_time) replayer.start() replay_table = replayer.add_table(hist_table, "DateTime") self.assertTrue(replay_table.is_refreshing) self.wait_ticking_table_update(replay_table, row_count=3, timeout=60) replayer.shutdown()
def to_table(df: pandas.DataFrame, cols: List[str] = None) -> Table: """ Creates a new table from a pandas.DataFrame. Args: df (DataFrame): the Pandas DataFrame instance cols (List[str]): the dataframe column names, default is None which means including all columns in the dataframe Returns: a Deephaven table Raise: DHError """ try: if not cols: cols = list(df) else: diff_set = set(cols) - set(list(df)) if diff_set: raise DHError(message=f"columns - {list(diff_set)} not found") input_cols = [] for col in cols: input_cols.append(_make_input_column(col, df.get(col).values)) return new_table(cols=input_cols) except DHError: raise except Exception as e: raise DHError( e, "failed to create a Deephaven Table from a Pandas DataFrame." ) from e
def test_new_table(self): jobj1 = JArrayList() jobj1.add(1) jobj1.add(-1) jobj2 = JArrayList() jobj2.add(2) jobj2.add(-2) cols = [ bool_col(name="Boolean", data=[True, False]), byte_col(name="Byte", data=(1, -1)), char_col(name="Char", data='-1'), short_col(name="Short", data=[1, -1]), int_col(name="Int", data=[1, -1]), long_col(name="Long", data=[1, -1]), long_col(name="NPLong", data=np.array([1, -1], dtype=np.int8)), float_col(name="Float", data=[1.01, -1.01]), double_col(name="Double", data=[1.01, -1.01]), string_col(name="String", data=["foo", "bar"]), datetime_col(name="Datetime", data=[dtypes.DateTime(1), dtypes.DateTime(-1)]), pyobj_col(name="PyObj", data=[CustomClass(1, "1"), CustomClass(-1, "-1")]), pyobj_col(name="PyObj1", data=[[1, 2, 3], CustomClass(-1, "-1")]), pyobj_col(name="PyObj2", data=[False, 'False']), jobj_col(name="JObj", data=[jobj1, jobj2]), ] t = new_table(cols=cols) self.assertEqual(t.size, 2)
def test_to_numpy_remap(self): for col in self.test_table.columns: with self.subTest(f"test single column to numpy - {col.name}"): np_array = to_numpy(self.test_table, [col.name]) self.assertEqual((2, 1), np_array.shape) try: to_numpy(self.test_table, [col.name for col in self.test_table.columns]) except DHError as e: self.assertIn("same data type", e.root_cause) with self.subTest("test multi-columns to numpy"): input_cols = [ long_col(name="Long", data=[101, -101]), long_col(name="Long1", data=[11011, -11011]), long_col(name="Long2", data=[NULL_LONG, -1110111]), long_col(name="Long3", data=[111101111, -111101111]), long_col(name="Long4", data=[11111011111, MAX_LONG]) ] tmp_table = new_table(cols=input_cols) tmp_table = tmp_table.update(formulas=[ "Long2 = isNull(Long2) ? Double.NaN : Long2", "Long4 = (double)Long4" ]) np_array = to_numpy(tmp_table, ['Long2', 'Long4']) self.assertEqual((2, 2), np_array.shape) self.assertEqual(np_array.dtype, float) tmp_table2 = to_table(np_array, ['Long2', 'Long4']) self.assert_table_equals(tmp_table2, tmp_table.select(['Long2', 'Long4']))
def test_get_constituent(self): keys = [917, 167] self.assertIsNotNone(self.partitioned_table.get_constituent(keys)) from deephaven.column import string_col, int_col, double_col houses = new_table([ string_col("HomeType", [ "Colonial", "Contemporary", "Contemporary", "Condo", "Colonial", "Apartment" ]), int_col("HouseNumber", [1, 3, 4, 15, 4, 9]), string_col("StreetName", [ "Test Drive", "Test Drive", "Test Drive", "Deephaven Road", "Community Circle", "Community Circle" ]), int_col("SquareFeet", [2251, 1914, 4266, 1280, 3433, 981]), int_col("Price", [450000, 400000, 1250000, 300000, 600000, 275000]), double_col("LotSizeAcres", [0.41, 0.26, 1.88, 0.11, 0.95, 0.10]) ]) houses_by_type = houses.partition_by("HomeType") colonial_homes = houses_by_type.get_constituent("Colonial") self.assertIsNotNone(colonial_homes)
def setUp(self): j_array_list1 = j_array_list([1, -1]) j_array_list2 = j_array_list([2, -2]) input_cols = [ bool_col(name="Boolean", data=[True, False]), byte_col(name="Byte", data=(1, -1)), char_col(name="Char", data='-1'), short_col(name="Short", data=[1, -1]), int_col(name="Int_", data=[1, -1]), long_col(name="Long_", data=[1, NULL_LONG]), long_col(name="NPLong", data=np.array([1, -1], dtype=np.int8)), float_col(name="Float_", data=[1.01, -1.01]), double_col(name="Double_", data=[1.01, -1.01]), string_col(name="String", data=["foo", "bar"]), datetime_col(name="Datetime", data=[dtypes.DateTime(1), dtypes.DateTime(-1)]), pyobj_col(name="PyObj", data=[CustomClass(1, "1"), CustomClass(-1, "-1")]), pyobj_col(name="PyObj1", data=[[1, 2, 3], CustomClass(-1, "-1")]), pyobj_col(name="PyObj2", data=[False, 'False']), jobj_col(name="JObj", data=[j_array_list1, j_array_list2]), ] self.test_table = new_table(cols=input_cols)
def test_to_numpy(self): for col in self.test_table.columns: with self.subTest(f"test single column to numpy- {col.name}"): np_array = to_numpy(self.test_table, [col.name]) self.assertEqual((2, 1), np_array.shape) np.array_equal(np_array, self.np_array_dict[col.name]) try: to_numpy(self.test_table, [col.name for col in self.test_table.columns]) except DHError as e: self.assertIn("same data type", e.root_cause) with self.subTest("test multi-columns to numpy"): input_cols = [ float_col(name="Float", data=[1.01, -1.01]), float_col(name="Float1", data=[11.011, -11.011]), float_col(name="Float2", data=[111.0111, -111.0111]), float_col(name="Float3", data=[1111.01111, -1111.01111]), float_col(name="Float4", data=[11111.011111, -11111.011111]) ] tmp_table = new_table(cols=input_cols) np_array = to_numpy(tmp_table, [col.name for col in tmp_table.columns]) self.assertEqual((2, 5), np_array.shape)
def test_to_table(self): for col in self.test_table.columns: with self.subTest(f"test single column to numpy- {col.name}"): np_array = to_numpy(self.test_table, [col.name]) test_table = to_table(np_array, [col.name]) self.assertEqual(test_table.size, self.test_table.size) with self.subTest("test multi-columns to numpy"): input_cols = [ float_col(name="Float", data=[1.01, -1.01]), float_col(name="Float1", data=[11.011, -11.011]), float_col(name="Float2", data=[111.0111, -111.0111]), float_col(name="Float3", data=[1111.01111, -1111.01111]), float_col(name="Float4", data=[11111.011111, -11111.011111]) ] tmp_table = new_table(cols=input_cols) np_array = to_numpy(tmp_table, [col.name for col in tmp_table.columns]) tmp_table2 = to_table(np_array, [col.name for col in tmp_table.columns]) self.assert_table_equals(tmp_table2, tmp_table) with self.assertRaises(DHError) as cm: tmp_table3 = to_table(np_array[:, [0, 1, 3]], [col.name for col in tmp_table.columns]) self.assertIn("doesn't match", cm.exception.root_cause)
def setUp(self): j_array_list1 = j_array_list([1, -1]) j_array_list2 = j_array_list([2, -2]) input_cols = [ bool_col(name="Boolean", data=[True, False]), byte_col(name="Byte", data=(1, -1)), char_col(name="Char", data='-1'), short_col(name="Short", data=[1, -1]), int_col(name="Int", data=[1, -1]), long_col(name="Long", data=[1, NULL_LONG]), long_col(name="NPLong", data=np.array([1, -1], dtype=np.int8)), float_col(name="Float", data=[1.01, -1.01]), double_col(name="Double", data=[1.01, -1.01]), string_col(name="String", data=["foo", "bar"]), datetime_col(name="Datetime", data=[dtypes.DateTime(1), dtypes.DateTime(-1)]), pyobj_col(name="PyObj", data=[CustomClass(1, "1"), CustomClass(-1, "-1")]), pyobj_col(name="PyObj1", data=[[1, 2, 3], CustomClass(-1, "-1")]), pyobj_col(name="PyObj2", data=[False, 'False']), jobj_col(name="JObj", data=[j_array_list1, j_array_list2]), ] self.test_table = new_table(cols=input_cols) self.np_array_dict = { 'Boolean': np.array([True, False]), 'Byte': np.array([1, -1], dtype=np.int8), 'Char': np.array('-1', dtype=np.int16), 'Short': np.array([1, -1], dtype=np.int16), 'Int': np.array([1, -1], dtype=np.int32), 'Long': np.array([1, NULL_LONG], dtype=np.int64), "NPLong": np.array([1, -1], dtype=np.int8), "Float": np.array([1.01, -1.01], dtype=np.float32), "Double": np.array([1.01, -1.01]), "String": np.array(["foo", "bar"], dtype=np.string_), "Datetime": np.array([1, -1], dtype=np.dtype("datetime64[ns]")), "PyObj": np.array([CustomClass(1, "1"), CustomClass(-1, "-1")]), "PyObj1": np.array([[1, 2, 3], CustomClass(-1, "-1")], dtype=np.object_), "PyObj2": np.array([False, 'False'], dtype=np.object_), "JObj": np.array([j_array_list1, j_array_list2]), }
def to_table(np_array: np.ndarray, cols: List[str]) -> Table: """ Creates a new table from a numpy array. Args: np_array (np.ndarray): the numpy array cols (List[str]): the table column names that will be assigned to each column in the numpy array Returns: a Deephaven table Raise: DHError """ try: _, *dims = np_array.shape if dims: if not cols or len(cols) != dims[0]: raise DHError( message=f"the number of array columns {dims[0]} doesn't match " f"the number of column names {len(cols)}") input_cols = [] if len(cols) == 1: input_cols.append(_make_input_column(cols[0], np_array)) else: for i, col in enumerate(cols): input_cols.append(_make_input_column(col, np_array[:, [i]])) return new_table(cols=input_cols) except DHError: raise except Exception as e: raise DHError(e, "failed to create a Deephaven Table from a Pandas DataFrame.") from e
def test_to_table_boolean_with_none(self): input_cols = [bool_col(name="Boolean", data=[True, None])] table_with_null_bool = new_table(cols=input_cols) prepared_table = table_with_null_bool.update(formulas=[ "Boolean = isNull(Boolean) ? NULL_BYTE : (Boolean == true ? 1: 0)" ]) df = to_pandas(prepared_table) table_from_df = to_table(df) self.assert_table_equals(table_from_df, prepared_table)
def table_helper(): columns = [ string_col('Symbol', ['MSFT', 'GOOG', 'AAPL', 'AAPL']), string_col('Side', ['B', 'B', 'S', 'B']), int_col('Qty', [200, 100, 300, 50]), double_col('Price', [210.0, 310.5, 411.0, 411.5]) ] t = new_table(cols=columns) return t
def test_array_column(self): strings = ["Str1", "Str1", "Str2", "Str2"] doubles = [1.0, 2.0, 4.0, 8.0] test_table = new_table([ string_col("StringColumn", strings), double_col("Decimals", doubles) ]) test_table = test_table.group_by(["StringColumn"]) self.assertIsNone(test_table.columns[0].component_type) self.assertEqual(test_table.columns[1].component_type, dtypes.double)
def test_simple_spec(self): """ Check a simple Kafka subscription creates the right table. """ t = new_table(cols=[double_col('Price', [10.0, 10.5, 11.0, 11.5])]) cleanup = pk.produce(t, {'bootstrap.servers': 'redpanda:29092'}, 'orders', key_spec=KeyValueSpec.IGNORE, value_spec=pk.simple_spec('Price')) self.assertIsNotNone(cleanup) cleanup()
def test_vector_column(self): strings = ["Str1", "Str1", "Str2", "Str2", "Str2"] doubles = [1.0, 2.0, 4.0, 8.0, 16.0] test_table = new_table( [string_col("String", strings), double_col("Doubles", doubles)]) test_table = test_table.group_by(["String"]) df = to_pandas(test_table, cols=["String", "Doubles"]) self.assertEqual(df['String'].dtype, np.object_) self.assertEqual(df['Doubles'].dtype, np.object_) double_series = df['Doubles'] self.assertEqual([1.0, 2.0], list(double_series[0].toArray())) self.assertEqual([4.0, 8.0, 16.0], list(double_series[1].toArray()))
def test_to_table(self): input_cols = [ bool_col(name="Boolean", data=[True, False]), byte_col(name="Byte", data=(1, -1)), char_col(name="Char", data='-1'), short_col(name="Short", data=[1, -1]), int_col(name="Int", data=[1, -1]), long_col(name="Long", data=[1, NULL_LONG]), long_col(name="NPLong", data=np.array([1, -1], dtype=np.int8)), float_col(name="Float", data=[1.01, -1.01]), double_col(name="Double", data=[1.01, -1.01]), ] test_table = new_table(cols=input_cols) df = to_pandas(test_table) table_from_df = to_table(df) self.assert_table_equals(table_from_df, test_table)
def test_to_table_datetime_with_none(self): datetime_str = "2021-12-10T23:59:59 NY" dt = to_datetime(datetime_str) datetime_str = "2021-12-10T23:59:59 HI" dt1 = to_datetime(datetime_str) input_cols = [ datetime_col(name="Datetime", data=[dtypes.DateTime(1), None, dt, dt1]) ] table_with_null_dt = new_table(cols=input_cols) df = to_pandas(table_with_null_dt) table_from_df = to_table(df) self.assert_table_equals(table_from_df, table_with_null_dt)
def test_round_trip_with_nulls(self): # Note that no two-way conversion for those types # j_array_list = dtypes.ArrayList([1, -1]) # bool_col(name="Boolean", data=[True, None])] # string_col(name="String", data=["foo", None]), # jobj_col(name="JObj", data=[j_array_list, None]), input_cols = [ byte_col(name="Byte", data=(1, NULL_BYTE)), char_col(name="Char", data='-1'), short_col(name="Short", data=[1, NULL_SHORT]), int_col(name="Int_", data=[1, NULL_INT]), long_col(name="Long_", data=[1, NULL_LONG]), float_col(name="Float_", data=[1.01, np.nan]), double_col(name="Double_", data=[1.01, np.nan]), datetime_col(name="Datetime", data=[dtypes.DateTime(1), None]), pyobj_col(name="PyObj", data=[CustomClass(1, "1"), None]), ] test_table = new_table(cols=input_cols) df = to_pandas(test_table) self.assertEqual(len(df.columns), len(test_table.columns)) self.assertEqual(df.size, 2 * len(test_table.columns)) test_table2 = to_table(df) self.assert_table_equals(test_table2, test_table)
def test_big_decimal(self): j_type = dtypes.BigDecimal.j_type big_decimal_list = [ j_type.valueOf(301, 2), j_type.valueOf(201, 2), j_type.valueOf(101, 2) ] bd_col = InputColumn(name='decimal_value', data_type=dtypes.BigDecimal, input_data=big_decimal_list) table = new_table([bd_col]) self.assertIsNotNone(table) base_dir = os.path.join(self.temp_dir.name, 'testCreation') file_location = os.path.join(base_dir, 'table1.parquet') if os.path.exists(file_location): shutil.rmtree(file_location) write(table, file_location) table2 = read(file_location) self.assertEqual(table.size, table2.size) self.assert_table_equals(table, table2) self.assertTrue(os.path.exists(file_location)) shutil.rmtree(base_dir)