def test_getitem_slice(self): result = pdw.Index(np.array([1, 2, 3]), np.dtype(np.int64))[:2] expected_result = pdw.Index(np.array([1, 2]), np.dtype(np.int64)) np.testing.assert_array_equal( evaluate_if_necessary(expected_result).data, evaluate_if_necessary(result).data)
def test_getitem_filter(self): to_filter = LazyResult( np.array([True, False, True], dtype=np.dtype(np.bool)), numpy_to_weld_type(np.dtype(np.bool)), 1) result = pdw.Index(np.array([1, 2, 3]), np.dtype(np.int64))[to_filter] expected_result = pdw.Index(np.array([1, 3]), np.dtype(np.int64)) np.testing.assert_array_equal( evaluate_if_necessary(expected_result).data, evaluate_if_necessary(result).data)
def test_drop_str(self): data = {'col2': np.array([5., 6., 7., 8.])} index = pdw.MultiIndex.from_product( [np.array([1, 2]), np.array([3, 4])], ['a', 'b']) expected_result = pdw.DataFrame(data, index) result = self.df.drop('col1') self.assertListEqual(expected_result.data.keys(), result.data.keys()) np.testing.assert_array_equal( evaluate_if_necessary(expected_result['col2']), evaluate_if_necessary(result['col2'])) test_equal_multiindex(expected_result.index, result.index)
def test_setitem_new(self): new_column = np.array([11, 12, 13, 14]) self.df['col3'] = new_column np.testing.assert_array_equal(new_column, evaluate_if_necessary(self.df['col3']))
def test_agg(self): expected_result = pdw.DataFrame( { 'col1': np.array([1, 4], dtype=np.float64), 'col2': np.array([5, 8], dtype=np.float64) }, pdw.Index(np.array(['min', 'max'], dtype=np.dtype('str')), np.dtype('str'))) result = self.df.agg(['min', 'max']) np.testing.assert_array_equal( evaluate_if_necessary(expected_result.index), evaluate_if_necessary(result.index)) test_equal_series(expected_result['col1'], result['col1']) test_equal_series(expected_result['col2'], result['col2'])
def test_setitem_series(self): new_column = np.array([11, 12, 13, 14]) self.df['col3'] = pdw.Series(new_column, new_column.dtype, self.df.index) np.testing.assert_array_equal(new_column, evaluate_if_necessary(self.df['col3']))
def test_getitem_series(self): data = {'col1': np.array([1, 2]), 'col2': np.array([5., 6.])} index = pdw.MultiIndex( [np.array([1, 2]), np.array([3, 4])], [np.array([0, 0]), np.array([0, 1])], ['a', 'b']) expected_result = pdw.DataFrame(data, index) result = self.df[self.df['col1'] < 3] np.testing.assert_array_equal( evaluate_if_necessary(expected_result['col1']), evaluate_if_necessary(result['col1'])) np.testing.assert_array_equal( evaluate_if_necessary(expected_result['col2']), evaluate_if_necessary(result['col2'])) test_equal_multiindex(expected_result.index, result.index)
def test_reset_index(self): result = self.df.reset_index() expected_result = pdw.DataFrame( { 'col1': np.array([1, 2, 3, 4]), 'col2': np.array([5., 6., 7., 8.]), 'a': np.array([1, 1, 2, 2]), 'b': np.array([3, 4, 3, 4]) }, pdw.RangeIndex(0, 4, 1)) np.testing.assert_array_equal( evaluate_if_necessary(expected_result.index), evaluate_if_necessary(result.index)) test_equal_series(expected_result['col1'], result['col1']) test_equal_series(expected_result['col2'], result['col2']) test_equal_series(expected_result['a'], result['a']) test_equal_series(expected_result['b'], result['b'])
def test_getitem_list(self): data = { 'col1': np.array([1, 2, 3, 4]), 'col2': np.array([5., 6., 7., 8.]) } index = pdw.MultiIndex.from_product( [np.array([1, 2]), np.array([3, 4])], ['a', 'b']) expected_result = pdw.DataFrame(data, index) result = self.df[['col1', 'col2']] np.testing.assert_array_equal( evaluate_if_necessary(expected_result['col1']), evaluate_if_necessary(result['col1'])) np.testing.assert_array_equal( evaluate_if_necessary(expected_result['col2']), evaluate_if_necessary(result['col2'])) test_equal_multiindex(expected_result.index, result.index)
def test_describe(self): # reversed because of dict and not OrderedDict expected_result = pdw.DataFrame( { 'col1': np.array([1, 4, 2.5, 1.29089], np.float64), 'col2': np.array([5, 8, 6.5, 1.29099], np.float64) }, pdw.Index(np.array(['min', 'max', 'mean', 'std'], dtype=np.str), np.dtype(np.str), "Index")) result = self.df.describe(['min', 'max', 'mean', 'std']).evaluate() np.testing.assert_array_equal( evaluate_if_necessary(expected_result.index), evaluate_if_necessary(result.index)) test_equal_series(expected_result['col1'].evaluate(), result['col1'].evaluate()) test_equal_series(expected_result['col2'].evaluate(), result['col2'].evaluate())
def test_join_1d_index(self): df1 = pdw.DataFrame({'col1': np.array([1, 2, 3, 4, 5])}, pdw.Index(np.array([1, 3, 4, 5, 6]), np.dtype(np.int64))) df2 = pdw.DataFrame({'col2': np.array([1, 2, 3])}, pdw.Index(np.array([2, 3, 5]), np.dtype(np.int64))) result = df1.merge(df2) expected_result = pdw.DataFrame( { 'col1': np.array([2, 4]), 'col2': np.array([2, 3]) }, pdw.Index(np.array([3, 5]), np.dtype(np.int64))) np.testing.assert_array_equal( evaluate_if_necessary(expected_result.index), evaluate_if_necessary(result.index)) test_equal_series(expected_result['col1'], result['col1']) test_equal_series(expected_result['col2'], result['col2'])
def test_element_wise_operation(self): expected_data = { 'col1': np.array([2, 4, 6, 8]), 'col2': np.array([10, 12, 14, 16]) } expected_index = pdw.MultiIndex.from_product( [np.array([1, 2]), np.array([3, 4])], ['a', 'b']) expected_result = pdw.DataFrame(expected_data, expected_index) data = {'col1': np.array([1, 2, 3, 4]), 'col2': np.array([5, 6, 7, 8])} index = pdw.MultiIndex.from_product( [np.array([1, 2]), np.array([3, 4])], ['a', 'b']) result = pdw.DataFrame(data, index) * 2 np.testing.assert_array_equal( evaluate_if_necessary(expected_result['col1']), evaluate_if_necessary(result['col1'])) np.testing.assert_array_equal( evaluate_if_necessary(expected_result['col2']), evaluate_if_necessary(result['col2'])) test_equal_multiindex(expected_result.index, result.index)
def test_groupby_single_column_sum(self): df = pdw.DataFrame( { 'col1': np.array([1, 1, 2, 3, 3], dtype=np.int32), 'col2': np.array([3, 4, 5, 5, 6], dtype=np.int64), 'col3': np.array([5., 6., 7., 7., 7.], dtype=np.float32) }, pdw.MultiIndex([ np.array([1, 2, 3], dtype=np.int32), np.array([5., 6., 7.], dtype=np.float32) ], [ np.array([0, 0, 1, 2, 2], dtype=np.int64), np.array([0, 1, 2, 2, 2], dtype=np.int64) ], ['i32', 'f32'])) result = df.groupby('col1').sum() expected_result = pdw.DataFrame( { 'col2': np.array([7, 5, 11], dtype=np.int64), 'col3': np.array([11., 7., 14.], dtype=np.float32) }, pdw.Index(np.array([1, 2, 3], dtype=np.int32), np.dtype('int32'), 'col1')) # TODO: test equal 1d index method (both rangeindex and index should work) np.testing.assert_array_equal( np.sort(evaluate_if_necessary(expected_result.index)), np.sort(evaluate_if_necessary(result.index))) # assume correct values but in different order; just check the values np.testing.assert_array_equal( np.sort(expected_result['col2'].evaluate().data), np.sort(result['col2'].evaluate().data)) np.testing.assert_array_equal( np.sort(expected_result['col3'].evaluate().data), np.sort(result['col3'].evaluate().data))
def test_getitem_column(self): expected_result = np.array([1, 2, 3, 4]) result = evaluate_if_necessary(self.df['col1']) np.testing.assert_array_equal(expected_result, result)