Beispiel #1
0
    def test_historical_table_replayer(self):
        dt1 = to_datetime("2000-01-01T00:00:01 NY")
        dt2 = to_datetime("2000-01-01T00:00:02 NY")
        dt3 = to_datetime("2000-01-01T00:00:04 NY")

        hist_table = new_table([
            datetime_col("DateTime", [dt1, dt2, dt3]),
            int_col("Number", [1, 3, 6])
        ])

        hist_table2 = new_table([
            datetime_col("DateTime", [dt1, dt2, dt3]),
            int_col("Number", [1, 3, 6])
        ])

        start_time = to_datetime("2000-01-01T00:00:00 NY")
        end_time = to_datetime("2000-01-01T00:00:05 NY")

        replayer = TableReplayer(start_time, end_time)
        replay_table = replayer.add_table(hist_table, "DateTime")
        replay_table2 = replayer.add_table(hist_table2, "DateTime")
        self.assert_table_equals(replay_table, replay_table2)

        replayer.start()
        self.assertTrue(replay_table.is_refreshing)
        self.assertTrue(replay_table2.is_refreshing)
        self.wait_ticking_table_update(replay_table, row_count=3, timeout=60)
        self.wait_ticking_table_update(replay_table2, row_count=3, timeout=60)
        self.assert_table_equals(replay_table, replay_table2)
        replayer.shutdown()

        with self.subTest("replayer can't be reused after shutdown."):
            with self.assertRaises(DHError) as cm:
                replayer.add_table(hist_table, "DateTime")
            self.assertIn("RuntimeError", cm.exception.root_cause)

        with self.subTest("replayer can't be restarted after shutdown."):
            with self.assertRaises(DHError):
                replayer.start()

        with self.subTest("Add table after replayer is restarted."):
            replayer = TableReplayer(start_time, end_time)
            replayer.start()
            replay_table = replayer.add_table(hist_table, "DateTime")
            self.assertTrue(replay_table.is_refreshing)
            self.wait_ticking_table_update(replay_table,
                                           row_count=3,
                                           timeout=60)
            replayer.shutdown()
Beispiel #2
0
def to_table(df: pandas.DataFrame, cols: List[str] = None) -> Table:
    """  Creates a new table from a pandas.DataFrame.

    Args:
        df (DataFrame): the Pandas DataFrame instance
        cols (List[str]): the dataframe column names, default is None which means including all columns in the dataframe

    Returns:
        a Deephaven table

    Raise:
        DHError
    """

    try:
        if not cols:
            cols = list(df)
        else:
            diff_set = set(cols) - set(list(df))
            if diff_set:
                raise DHError(message=f"columns - {list(diff_set)} not found")

        input_cols = []
        for col in cols:
            input_cols.append(_make_input_column(col, df.get(col).values))

        return new_table(cols=input_cols)
    except DHError:
        raise
    except Exception as e:
        raise DHError(
            e, "failed to create a Deephaven Table from a Pandas DataFrame."
        ) from e
Beispiel #3
0
    def test_new_table(self):
        jobj1 = JArrayList()
        jobj1.add(1)
        jobj1.add(-1)
        jobj2 = JArrayList()
        jobj2.add(2)
        jobj2.add(-2)
        cols = [
            bool_col(name="Boolean", data=[True, False]),
            byte_col(name="Byte", data=(1, -1)),
            char_col(name="Char", data='-1'),
            short_col(name="Short", data=[1, -1]),
            int_col(name="Int", data=[1, -1]),
            long_col(name="Long", data=[1, -1]),
            long_col(name="NPLong", data=np.array([1, -1], dtype=np.int8)),
            float_col(name="Float", data=[1.01, -1.01]),
            double_col(name="Double", data=[1.01, -1.01]),
            string_col(name="String", data=["foo", "bar"]),
            datetime_col(name="Datetime",
                         data=[dtypes.DateTime(1),
                               dtypes.DateTime(-1)]),
            pyobj_col(name="PyObj",
                      data=[CustomClass(1, "1"),
                            CustomClass(-1, "-1")]),
            pyobj_col(name="PyObj1", data=[[1, 2, 3],
                                           CustomClass(-1, "-1")]),
            pyobj_col(name="PyObj2", data=[False, 'False']),
            jobj_col(name="JObj", data=[jobj1, jobj2]),
        ]

        t = new_table(cols=cols)
        self.assertEqual(t.size, 2)
Beispiel #4
0
    def test_to_numpy_remap(self):
        for col in self.test_table.columns:
            with self.subTest(f"test single column to numpy - {col.name}"):
                np_array = to_numpy(self.test_table, [col.name])
                self.assertEqual((2, 1), np_array.shape)

        try:
            to_numpy(self.test_table,
                     [col.name for col in self.test_table.columns])
        except DHError as e:
            self.assertIn("same data type", e.root_cause)

        with self.subTest("test multi-columns to numpy"):
            input_cols = [
                long_col(name="Long", data=[101, -101]),
                long_col(name="Long1", data=[11011, -11011]),
                long_col(name="Long2", data=[NULL_LONG, -1110111]),
                long_col(name="Long3", data=[111101111, -111101111]),
                long_col(name="Long4", data=[11111011111, MAX_LONG])
            ]
            tmp_table = new_table(cols=input_cols)
            tmp_table = tmp_table.update(formulas=[
                "Long2 = isNull(Long2) ? Double.NaN : Long2",
                "Long4 = (double)Long4"
            ])
            np_array = to_numpy(tmp_table, ['Long2', 'Long4'])
            self.assertEqual((2, 2), np_array.shape)
            self.assertEqual(np_array.dtype, float)
            tmp_table2 = to_table(np_array, ['Long2', 'Long4'])
            self.assert_table_equals(tmp_table2,
                                     tmp_table.select(['Long2', 'Long4']))
    def test_get_constituent(self):
        keys = [917, 167]
        self.assertIsNotNone(self.partitioned_table.get_constituent(keys))

        from deephaven.column import string_col, int_col, double_col

        houses = new_table([
            string_col("HomeType", [
                "Colonial", "Contemporary", "Contemporary", "Condo",
                "Colonial", "Apartment"
            ]),
            int_col("HouseNumber", [1, 3, 4, 15, 4, 9]),
            string_col("StreetName", [
                "Test Drive", "Test Drive", "Test Drive", "Deephaven Road",
                "Community Circle", "Community Circle"
            ]),
            int_col("SquareFeet", [2251, 1914, 4266, 1280, 3433, 981]),
            int_col("Price",
                    [450000, 400000, 1250000, 300000, 600000, 275000]),
            double_col("LotSizeAcres", [0.41, 0.26, 1.88, 0.11, 0.95, 0.10])
        ])

        houses_by_type = houses.partition_by("HomeType")
        colonial_homes = houses_by_type.get_constituent("Colonial")
        self.assertIsNotNone(colonial_homes)
Beispiel #6
0
 def setUp(self):
     j_array_list1 = j_array_list([1, -1])
     j_array_list2 = j_array_list([2, -2])
     input_cols = [
         bool_col(name="Boolean", data=[True, False]),
         byte_col(name="Byte", data=(1, -1)),
         char_col(name="Char", data='-1'),
         short_col(name="Short", data=[1, -1]),
         int_col(name="Int_", data=[1, -1]),
         long_col(name="Long_", data=[1, NULL_LONG]),
         long_col(name="NPLong", data=np.array([1, -1], dtype=np.int8)),
         float_col(name="Float_", data=[1.01, -1.01]),
         double_col(name="Double_", data=[1.01, -1.01]),
         string_col(name="String", data=["foo", "bar"]),
         datetime_col(name="Datetime",
                      data=[dtypes.DateTime(1),
                            dtypes.DateTime(-1)]),
         pyobj_col(name="PyObj",
                   data=[CustomClass(1, "1"),
                         CustomClass(-1, "-1")]),
         pyobj_col(name="PyObj1", data=[[1, 2, 3],
                                        CustomClass(-1, "-1")]),
         pyobj_col(name="PyObj2", data=[False, 'False']),
         jobj_col(name="JObj", data=[j_array_list1, j_array_list2]),
     ]
     self.test_table = new_table(cols=input_cols)
Beispiel #7
0
    def test_to_numpy(self):
        for col in self.test_table.columns:
            with self.subTest(f"test single column to numpy- {col.name}"):
                np_array = to_numpy(self.test_table, [col.name])
                self.assertEqual((2, 1), np_array.shape)
                np.array_equal(np_array, self.np_array_dict[col.name])

        try:
            to_numpy(self.test_table,
                     [col.name for col in self.test_table.columns])
        except DHError as e:
            self.assertIn("same data type", e.root_cause)

        with self.subTest("test multi-columns to numpy"):
            input_cols = [
                float_col(name="Float", data=[1.01, -1.01]),
                float_col(name="Float1", data=[11.011, -11.011]),
                float_col(name="Float2", data=[111.0111, -111.0111]),
                float_col(name="Float3", data=[1111.01111, -1111.01111]),
                float_col(name="Float4", data=[11111.011111, -11111.011111])
            ]
            tmp_table = new_table(cols=input_cols)
            np_array = to_numpy(tmp_table,
                                [col.name for col in tmp_table.columns])
            self.assertEqual((2, 5), np_array.shape)
Beispiel #8
0
    def test_to_table(self):
        for col in self.test_table.columns:
            with self.subTest(f"test single column to numpy- {col.name}"):
                np_array = to_numpy(self.test_table, [col.name])
                test_table = to_table(np_array, [col.name])
                self.assertEqual(test_table.size, self.test_table.size)

        with self.subTest("test multi-columns to numpy"):
            input_cols = [
                float_col(name="Float", data=[1.01, -1.01]),
                float_col(name="Float1", data=[11.011, -11.011]),
                float_col(name="Float2", data=[111.0111, -111.0111]),
                float_col(name="Float3", data=[1111.01111, -1111.01111]),
                float_col(name="Float4", data=[11111.011111, -11111.011111])
            ]
            tmp_table = new_table(cols=input_cols)
            np_array = to_numpy(tmp_table,
                                [col.name for col in tmp_table.columns])
            tmp_table2 = to_table(np_array,
                                  [col.name for col in tmp_table.columns])
            self.assert_table_equals(tmp_table2, tmp_table)

            with self.assertRaises(DHError) as cm:
                tmp_table3 = to_table(np_array[:, [0, 1, 3]],
                                      [col.name for col in tmp_table.columns])
            self.assertIn("doesn't match", cm.exception.root_cause)
Beispiel #9
0
    def setUp(self):
        j_array_list1 = j_array_list([1, -1])
        j_array_list2 = j_array_list([2, -2])
        input_cols = [
            bool_col(name="Boolean", data=[True, False]),
            byte_col(name="Byte", data=(1, -1)),
            char_col(name="Char", data='-1'),
            short_col(name="Short", data=[1, -1]),
            int_col(name="Int", data=[1, -1]),
            long_col(name="Long", data=[1, NULL_LONG]),
            long_col(name="NPLong", data=np.array([1, -1], dtype=np.int8)),
            float_col(name="Float", data=[1.01, -1.01]),
            double_col(name="Double", data=[1.01, -1.01]),
            string_col(name="String", data=["foo", "bar"]),
            datetime_col(name="Datetime",
                         data=[dtypes.DateTime(1),
                               dtypes.DateTime(-1)]),
            pyobj_col(name="PyObj",
                      data=[CustomClass(1, "1"),
                            CustomClass(-1, "-1")]),
            pyobj_col(name="PyObj1", data=[[1, 2, 3],
                                           CustomClass(-1, "-1")]),
            pyobj_col(name="PyObj2", data=[False, 'False']),
            jobj_col(name="JObj", data=[j_array_list1, j_array_list2]),
        ]
        self.test_table = new_table(cols=input_cols)

        self.np_array_dict = {
            'Boolean':
            np.array([True, False]),
            'Byte':
            np.array([1, -1], dtype=np.int8),
            'Char':
            np.array('-1', dtype=np.int16),
            'Short':
            np.array([1, -1], dtype=np.int16),
            'Int':
            np.array([1, -1], dtype=np.int32),
            'Long':
            np.array([1, NULL_LONG], dtype=np.int64),
            "NPLong":
            np.array([1, -1], dtype=np.int8),
            "Float":
            np.array([1.01, -1.01], dtype=np.float32),
            "Double":
            np.array([1.01, -1.01]),
            "String":
            np.array(["foo", "bar"], dtype=np.string_),
            "Datetime":
            np.array([1, -1], dtype=np.dtype("datetime64[ns]")),
            "PyObj":
            np.array([CustomClass(1, "1"),
                      CustomClass(-1, "-1")]),
            "PyObj1":
            np.array([[1, 2, 3], CustomClass(-1, "-1")], dtype=np.object_),
            "PyObj2":
            np.array([False, 'False'], dtype=np.object_),
            "JObj":
            np.array([j_array_list1, j_array_list2]),
        }
Beispiel #10
0
def to_table(np_array: np.ndarray, cols: List[str]) -> Table:
    """  Creates a new table from a numpy array.

    Args:
        np_array (np.ndarray): the numpy array
        cols (List[str]): the table column names that will be assigned to each column in the numpy array

    Returns:
        a Deephaven table

    Raise:
        DHError
    """

    try:
        _, *dims = np_array.shape
        if dims:
            if not cols or len(cols) != dims[0]:
                raise DHError(
                    message=f"the number of array columns {dims[0]} doesn't match "
                            f"the number of column names {len(cols)}")

        input_cols = []
        if len(cols) == 1:
            input_cols.append(_make_input_column(cols[0], np_array))
        else:
            for i, col in enumerate(cols):
                input_cols.append(_make_input_column(col, np_array[:, [i]]))

        return new_table(cols=input_cols)
    except DHError:
        raise
    except Exception as e:
        raise DHError(e, "failed to create a Deephaven Table from a Pandas DataFrame.") from e
Beispiel #11
0
 def test_to_table_boolean_with_none(self):
     input_cols = [bool_col(name="Boolean", data=[True, None])]
     table_with_null_bool = new_table(cols=input_cols)
     prepared_table = table_with_null_bool.update(formulas=[
         "Boolean = isNull(Boolean) ? NULL_BYTE : (Boolean == true ? 1: 0)"
     ])
     df = to_pandas(prepared_table)
     table_from_df = to_table(df)
     self.assert_table_equals(table_from_df, prepared_table)
Beispiel #12
0
def table_helper():
    columns = [
        string_col('Symbol', ['MSFT', 'GOOG', 'AAPL', 'AAPL']),
        string_col('Side', ['B', 'B', 'S', 'B']),
        int_col('Qty', [200, 100, 300, 50]),
        double_col('Price', [210.0, 310.5, 411.0, 411.5])
    ]
    t = new_table(cols=columns)
    return t
Beispiel #13
0
    def test_array_column(self):
        strings = ["Str1", "Str1", "Str2", "Str2"]
        doubles = [1.0, 2.0, 4.0, 8.0]
        test_table = new_table([
            string_col("StringColumn", strings),
            double_col("Decimals", doubles)
        ])

        test_table = test_table.group_by(["StringColumn"])

        self.assertIsNone(test_table.columns[0].component_type)
        self.assertEqual(test_table.columns[1].component_type, dtypes.double)
Beispiel #14
0
    def test_simple_spec(self):
        """
        Check a simple Kafka subscription creates the right table.
        """
        t = new_table(cols=[double_col('Price', [10.0, 10.5, 11.0, 11.5])])
        cleanup = pk.produce(t, {'bootstrap.servers': 'redpanda:29092'},
                             'orders',
                             key_spec=KeyValueSpec.IGNORE,
                             value_spec=pk.simple_spec('Price'))

        self.assertIsNotNone(cleanup)
        cleanup()
Beispiel #15
0
    def test_vector_column(self):
        strings = ["Str1", "Str1", "Str2", "Str2", "Str2"]
        doubles = [1.0, 2.0, 4.0, 8.0, 16.0]
        test_table = new_table(
            [string_col("String", strings),
             double_col("Doubles", doubles)])

        test_table = test_table.group_by(["String"])
        df = to_pandas(test_table, cols=["String", "Doubles"])
        self.assertEqual(df['String'].dtype, np.object_)
        self.assertEqual(df['Doubles'].dtype, np.object_)

        double_series = df['Doubles']
        self.assertEqual([1.0, 2.0], list(double_series[0].toArray()))
        self.assertEqual([4.0, 8.0, 16.0], list(double_series[1].toArray()))
Beispiel #16
0
 def test_to_table(self):
     input_cols = [
         bool_col(name="Boolean", data=[True, False]),
         byte_col(name="Byte", data=(1, -1)),
         char_col(name="Char", data='-1'),
         short_col(name="Short", data=[1, -1]),
         int_col(name="Int", data=[1, -1]),
         long_col(name="Long", data=[1, NULL_LONG]),
         long_col(name="NPLong", data=np.array([1, -1], dtype=np.int8)),
         float_col(name="Float", data=[1.01, -1.01]),
         double_col(name="Double", data=[1.01, -1.01]),
     ]
     test_table = new_table(cols=input_cols)
     df = to_pandas(test_table)
     table_from_df = to_table(df)
     self.assert_table_equals(table_from_df, test_table)
Beispiel #17
0
    def test_to_table_datetime_with_none(self):
        datetime_str = "2021-12-10T23:59:59 NY"
        dt = to_datetime(datetime_str)

        datetime_str = "2021-12-10T23:59:59 HI"
        dt1 = to_datetime(datetime_str)

        input_cols = [
            datetime_col(name="Datetime",
                         data=[dtypes.DateTime(1), None, dt, dt1])
        ]
        table_with_null_dt = new_table(cols=input_cols)

        df = to_pandas(table_with_null_dt)
        table_from_df = to_table(df)
        self.assert_table_equals(table_from_df, table_with_null_dt)
Beispiel #18
0
 def test_round_trip_with_nulls(self):
     # Note that no two-way conversion for those types
     # j_array_list = dtypes.ArrayList([1, -1])
     # bool_col(name="Boolean", data=[True, None])]
     # string_col(name="String", data=["foo", None]),
     # jobj_col(name="JObj", data=[j_array_list, None]),
     input_cols = [
         byte_col(name="Byte", data=(1, NULL_BYTE)),
         char_col(name="Char", data='-1'),
         short_col(name="Short", data=[1, NULL_SHORT]),
         int_col(name="Int_", data=[1, NULL_INT]),
         long_col(name="Long_", data=[1, NULL_LONG]),
         float_col(name="Float_", data=[1.01, np.nan]),
         double_col(name="Double_", data=[1.01, np.nan]),
         datetime_col(name="Datetime", data=[dtypes.DateTime(1), None]),
         pyobj_col(name="PyObj", data=[CustomClass(1, "1"), None]),
     ]
     test_table = new_table(cols=input_cols)
     df = to_pandas(test_table)
     self.assertEqual(len(df.columns), len(test_table.columns))
     self.assertEqual(df.size, 2 * len(test_table.columns))
     test_table2 = to_table(df)
     self.assert_table_equals(test_table2, test_table)
Beispiel #19
0
    def test_big_decimal(self):
        j_type = dtypes.BigDecimal.j_type
        big_decimal_list = [
            j_type.valueOf(301, 2),
            j_type.valueOf(201, 2),
            j_type.valueOf(101, 2)
        ]
        bd_col = InputColumn(name='decimal_value',
                             data_type=dtypes.BigDecimal,
                             input_data=big_decimal_list)
        table = new_table([bd_col])
        self.assertIsNotNone(table)
        base_dir = os.path.join(self.temp_dir.name, 'testCreation')
        file_location = os.path.join(base_dir, 'table1.parquet')
        if os.path.exists(file_location):
            shutil.rmtree(file_location)

        write(table, file_location)
        table2 = read(file_location)
        self.assertEqual(table.size, table2.size)
        self.assert_table_equals(table, table2)

        self.assertTrue(os.path.exists(file_location))
        shutil.rmtree(base_dir)