def test_to_numpy(self): for col in self.test_table.columns: with self.subTest(f"test single column to numpy- {col.name}"): np_array = to_numpy(self.test_table, [col.name]) self.assertEqual((2, 1), np_array.shape) np.array_equal(np_array, self.np_array_dict[col.name]) try: to_numpy(self.test_table, [col.name for col in self.test_table.columns]) except DHError as e: self.assertIn("same data type", e.root_cause) with self.subTest("test multi-columns to numpy"): input_cols = [ float_col(name="Float", data=[1.01, -1.01]), float_col(name="Float1", data=[11.011, -11.011]), float_col(name="Float2", data=[111.0111, -111.0111]), float_col(name="Float3", data=[1111.01111, -1111.01111]), float_col(name="Float4", data=[11111.011111, -11111.011111]) ] tmp_table = new_table(cols=input_cols) np_array = to_numpy(tmp_table, [col.name for col in tmp_table.columns]) self.assertEqual((2, 5), np_array.shape)
def test_to_table(self): for col in self.test_table.columns: with self.subTest(f"test single column to numpy- {col.name}"): np_array = to_numpy(self.test_table, [col.name]) test_table = to_table(np_array, [col.name]) self.assertEqual(test_table.size, self.test_table.size) with self.subTest("test multi-columns to numpy"): input_cols = [ float_col(name="Float", data=[1.01, -1.01]), float_col(name="Float1", data=[11.011, -11.011]), float_col(name="Float2", data=[111.0111, -111.0111]), float_col(name="Float3", data=[1111.01111, -1111.01111]), float_col(name="Float4", data=[11111.011111, -11111.011111]) ] tmp_table = new_table(cols=input_cols) np_array = to_numpy(tmp_table, [col.name for col in tmp_table.columns]) tmp_table2 = to_table(np_array, [col.name for col in tmp_table.columns]) self.assert_table_equals(tmp_table2, tmp_table) with self.assertRaises(DHError) as cm: tmp_table3 = to_table(np_array[:, [0, 1, 3]], [col.name for col in tmp_table.columns]) self.assertIn("doesn't match", cm.exception.root_cause)
def setUp(self): j_array_list1 = j_array_list([1, -1]) j_array_list2 = j_array_list([2, -2]) input_cols = [ bool_col(name="Boolean", data=[True, False]), byte_col(name="Byte", data=(1, -1)), char_col(name="Char", data='-1'), short_col(name="Short", data=[1, -1]), int_col(name="Int_", data=[1, -1]), long_col(name="Long_", data=[1, NULL_LONG]), long_col(name="NPLong", data=np.array([1, -1], dtype=np.int8)), float_col(name="Float_", data=[1.01, -1.01]), double_col(name="Double_", data=[1.01, -1.01]), string_col(name="String", data=["foo", "bar"]), datetime_col(name="Datetime", data=[dtypes.DateTime(1), dtypes.DateTime(-1)]), pyobj_col(name="PyObj", data=[CustomClass(1, "1"), CustomClass(-1, "-1")]), pyobj_col(name="PyObj1", data=[[1, 2, 3], CustomClass(-1, "-1")]), pyobj_col(name="PyObj2", data=[False, 'False']), jobj_col(name="JObj", data=[j_array_list1, j_array_list2]), ] self.test_table = new_table(cols=input_cols)
def setUp(self): j_array_list1 = j_array_list([1, -1]) j_array_list2 = j_array_list([2, -2]) input_cols = [ bool_col(name="Boolean", data=[True, False]), byte_col(name="Byte", data=(1, -1)), char_col(name="Char", data='-1'), short_col(name="Short", data=[1, -1]), int_col(name="Int", data=[1, -1]), long_col(name="Long", data=[1, NULL_LONG]), long_col(name="NPLong", data=np.array([1, -1], dtype=np.int8)), float_col(name="Float", data=[1.01, -1.01]), double_col(name="Double", data=[1.01, -1.01]), string_col(name="String", data=["foo", "bar"]), datetime_col(name="Datetime", data=[dtypes.DateTime(1), dtypes.DateTime(-1)]), pyobj_col(name="PyObj", data=[CustomClass(1, "1"), CustomClass(-1, "-1")]), pyobj_col(name="PyObj1", data=[[1, 2, 3], CustomClass(-1, "-1")]), pyobj_col(name="PyObj2", data=[False, 'False']), jobj_col(name="JObj", data=[j_array_list1, j_array_list2]), ] self.test_table = new_table(cols=input_cols) self.np_array_dict = { 'Boolean': np.array([True, False]), 'Byte': np.array([1, -1], dtype=np.int8), 'Char': np.array('-1', dtype=np.int16), 'Short': np.array([1, -1], dtype=np.int16), 'Int': np.array([1, -1], dtype=np.int32), 'Long': np.array([1, NULL_LONG], dtype=np.int64), "NPLong": np.array([1, -1], dtype=np.int8), "Float": np.array([1.01, -1.01], dtype=np.float32), "Double": np.array([1.01, -1.01]), "String": np.array(["foo", "bar"], dtype=np.string_), "Datetime": np.array([1, -1], dtype=np.dtype("datetime64[ns]")), "PyObj": np.array([CustomClass(1, "1"), CustomClass(-1, "-1")]), "PyObj1": np.array([[1, 2, 3], CustomClass(-1, "-1")], dtype=np.object_), "PyObj2": np.array([False, 'False'], dtype=np.object_), "JObj": np.array([j_array_list1, j_array_list2]), }
def test_new_table(self): jobj1 = JArrayList() jobj1.add(1) jobj1.add(-1) jobj2 = JArrayList() jobj2.add(2) jobj2.add(-2) cols = [ bool_col(name="Boolean", data=[True, False]), byte_col(name="Byte", data=(1, -1)), char_col(name="Char", data='-1'), short_col(name="Short", data=[1, -1]), int_col(name="Int", data=[1, -1]), long_col(name="Long", data=[1, -1]), long_col(name="NPLong", data=np.array([1, -1], dtype=np.int8)), float_col(name="Float", data=[1.01, -1.01]), double_col(name="Double", data=[1.01, -1.01]), string_col(name="String", data=["foo", "bar"]), datetime_col(name="Datetime", data=[dtypes.DateTime(1), dtypes.DateTime(-1)]), pyobj_col(name="PyObj", data=[CustomClass(1, "1"), CustomClass(-1, "-1")]), pyobj_col(name="PyObj1", data=[[1, 2, 3], CustomClass(-1, "-1")]), pyobj_col(name="PyObj2", data=[False, 'False']), jobj_col(name="JObj", data=[jobj1, jobj2]), ] t = new_table(cols=cols) self.assertEqual(t.size, 2)
def test_to_table(self): input_cols = [ bool_col(name="Boolean", data=[True, False]), byte_col(name="Byte", data=(1, -1)), char_col(name="Char", data='-1'), short_col(name="Short", data=[1, -1]), int_col(name="Int", data=[1, -1]), long_col(name="Long", data=[1, NULL_LONG]), long_col(name="NPLong", data=np.array([1, -1], dtype=np.int8)), float_col(name="Float", data=[1.01, -1.01]), double_col(name="Double", data=[1.01, -1.01]), ] test_table = new_table(cols=input_cols) df = to_pandas(test_table) table_from_df = to_table(df) self.assert_table_equals(table_from_df, test_table)
def test_round_trip_with_nulls(self): # Note that no two-way conversion for those types # j_array_list = dtypes.ArrayList([1, -1]) # bool_col(name="Boolean", data=[True, None])] # string_col(name="String", data=["foo", None]), # jobj_col(name="JObj", data=[j_array_list, None]), input_cols = [ byte_col(name="Byte", data=(1, NULL_BYTE)), char_col(name="Char", data='-1'), short_col(name="Short", data=[1, NULL_SHORT]), int_col(name="Int_", data=[1, NULL_INT]), long_col(name="Long_", data=[1, NULL_LONG]), float_col(name="Float_", data=[1.01, np.nan]), double_col(name="Double_", data=[1.01, np.nan]), datetime_col(name="Datetime", data=[dtypes.DateTime(1), None]), pyobj_col(name="PyObj", data=[CustomClass(1, "1"), None]), ] test_table = new_table(cols=input_cols) df = to_pandas(test_table) self.assertEqual(len(df.columns), len(test_table.columns)) self.assertEqual(df.size, 2 * len(test_table.columns)) test_table2 = to_table(df) self.assert_table_equals(test_table2, test_table)
def test_column_error(self): jobj = j_array_list([1, -1]) with self.assertRaises(DHError) as cm: bool_input_col = bool_col(name="Boolean", data=[True, 'abc']) self.assertNotIn("bool_input_col", dir()) with self.assertRaises(DHError) as cm: _ = byte_col(name="Byte", data=[1, 'abc']) with self.assertRaises(DHError) as cm: _ = char_col(name="Char", data=[jobj]) with self.assertRaises(DHError) as cm: _ = short_col(name="Short", data=[1, 'abc']) with self.assertRaises(DHError) as cm: _ = int_col(name="Int", data=[1, [1, 2]]) with self.assertRaises(DHError) as cm: _ = long_col(name="Long", data=[1, float('inf')]) with self.assertRaises(DHError) as cm: _ = float_col(name="Float", data=[1.01, 'NaN']) with self.assertRaises(DHError) as cm: _ = double_col(name="Double", data=[1.01, jobj]) with self.assertRaises(DHError) as cm: _ = string_col(name="String", data=[1, -1.01]) with self.assertRaises(DHError) as cm: _ = datetime_col(name="Datetime", data=[dtypes.DateTime(round(time.time())), False]) with self.assertRaises(DHError) as cm: _ = jobj_col(name="JObj", data=[jobj, CustomClass(-1, "-1")])