def test_greater_than_equal(self): with pytest.raises(TypeError): self.df >= 5 with pytest.raises(TypeError): self.df.select_dtypes('str') >= 10 with pytest.raises(TypeError): self.df >= 'asdf' df = dx.DataFrame( { 'a': [6, 7, 10], 'b': [0, 2, nan], 'f': [0, 10, 3], 'g': np.zeros(3, dtype='int'), 'h': [nan, nan, nan] }, columns=list('abfgh')) df1 = df >= 3 df2 = dx.DataFrame( { 'a': [True, True, True], 'b': [False, False, False], 'f': [False, True, True], 'g': np.zeros(3, dtype='bool'), 'h': [False] * 3 }, columns=list('abfgh')) assert_frame_equal(df1, df2)
def test_boolean_column_selection(self): data = {'a': [0, 0, 5, 9, 3, 4, 5, 1], 'b': [0, 1.512344353, 8, 9, np.nan, 3, 2, 8], 'c': [''] + list('bgggzgh'), 'd': [False, False, True, False] * 2, 'e': [0, 20, 30, 4, 5, 6, 7, 8], 'f': [0., 3, 3, 3, 11, 4, 5, 1], 'g': ['', None, 'ad', 'effd', 'ef', None, 'ett', 'zzzz'], 'h': [0, 4, 5, 6, 7, 8, 9, 0], 'i': np.array([0, 7, 6, 5, 4, 3, 2, 11]), 'j': np.zeros(8, dtype='int'), 'k': np.ones(8) - 1, 'l': [np.nan] * 8} df = dx.DataFrame(data) df1 = df.select_dtypes('int') df_criteria = df1[1, :] == 0 df1 = df1[:, df_criteria] df2 = dx.DataFrame({'a': [0, 0, 5, 9, 3, 4, 5, 1], 'j': np.zeros(8, dtype='int')}) assert_frame_equal(df1, df2) criteria = np.array([False, False, False, True, True, False, False, False, False, False, False, False]) df1 = df[-3:, criteria] df2 = dx.DataFrame({'d': [False, True, False], 'e': [6, 7, 8]}) assert_frame_equal(df1, df2)
def test_mod_division_frame(self): with pytest.raises(TypeError): self.df % 5 with pytest.raises(TypeError): self.df.select_dtypes('str') % 10 with pytest.raises(TypeError): self.df % 'asdf' df = dx.DataFrame( { 'a': [6, 7, 10], 'b': [0, 2, nan], 'f': [0, 10, 3], 'g': np.zeros(3, dtype='int'), 'h': [np.nan] * 3 }, columns=list('abfgh')) df1 = df % 3 df2 = dx.DataFrame( { 'a': [0, 1, 1], 'b': [0, 2, nan], 'f': [0, 1, 0], 'g': np.zeros(3, dtype='int'), 'h': [np.nan] * 3 }, columns=list('abfgh')) assert_frame_equal(df1, df2)
def test_add_df(self): df_answer = dx.DataFrame({ 'a': np.array([2, 4]), 'b': np.array([-20, 20]), 'c': np.array([3., 16.]), 'd': np.array([4.6, nan]), 'e': np.array(['aa', 'bb'], dtype=object), 'f': np.array([True, False]), 'g': np.array([0, 172800000000000], dtype='timedelta64[ns]') }) assert_frame_equal(self.df + self.df, df_answer) df_answer = dx.DataFrame({ 'a': array([5, 7]), 'b': array([-10, 10]), 'c': array([3.5, 10.]), 'd': array([0.3, nan]) }) df_result = self.df_number + self.df_number2 assert_frame_equal(df_result, df_answer)
def test_floordiv_frame(self): with pytest.raises(TypeError): self.df // 5 with pytest.raises(TypeError): self.df.select_dtypes('str') // 10 with pytest.raises(TypeError): self.df // 'asdf' df = dx.DataFrame( { 'a': [0, 0, 10], 'b': [0, 20, nan], 'f': [0, 100, 10], 'g': np.zeros(3, dtype='int'), 'h': [np.nan] * 3 }, columns=list('abfgh')) df1 = df // 3 df2 = dx.DataFrame( { 'a': [0, 0, 3], 'b': [0, 6, nan], 'f': [0, 33, 3], 'g': np.zeros(3, dtype='int'), 'h': [np.nan] * 3 }, columns=list('abfgh')) assert_frame_equal(df1, df2)
def test_less_than_equal(self): with pytest.raises(TypeError): self.df <= 5 with pytest.raises(TypeError): self.df.select_dtypes('str') <= 10 with pytest.raises(TypeError): self.df <= 'asdf' df = dx.DataFrame({ 'a': [6, 7, 10], 'b': [0, 2, nan], 'f': [0, 10, 3], 'g': np.zeros(3, dtype='int'), 'h': [nan, nan, nan] }) df1 = df <= 3 df2 = dx.DataFrame({ 'a': [False, False, False], 'b': [True, True, False], 'f': [True, False, True], 'g': np.ones(3, dtype='bool'), 'h': [False] * 3 }) assert_frame_equal(df1, df2)
def test_fillna(self): data = { 'a': [4, nan, nan, nan, 3, 2], 'b': [None, 'a', 'd', None, None, 'er'], 'c': [nan, nan, 5, nan, 7, nan] } df = de.DataFrame(data) df1 = df.fillna(5) data = { 'a': [4.0, 5.0, 5.0, 5.0, 3.0, 2.0], 'b': [None, 'a', 'd', None, None, 'er'], 'c': [5.0, 5.0, 5.0, 5.0, 7.0, 5.0] } df2 = de.DataFrame(data) assert_frame_equal(df1, df2) df1 = df.fillna({'a': 10, 'b': 'poop'}) data = { 'a': [4.0, 10.0, 10.0, 10.0, 3.0, 2.0], 'b': ['poop', 'a', 'd', 'poop', 'poop', 'er'], 'c': [nan, nan, 5.0, nan, 7.0, nan] } df2 = de.DataFrame(data) assert_frame_equal(df1, df2) df1 = df.fillna('dupe') data = { 'a': [4.0, nan, nan, nan, 3.0, 2.0], 'b': ['dupe', 'a', 'd', 'dupe', 'dupe', 'er'], 'c': [nan, nan, 5.0, nan, 7.0, nan] } df2 = de.DataFrame(data) assert_frame_equal(df1, df2)
def test_mult_frame(self): df1 = self.df * 2 df2 = dx.DataFrame({ 'a': [0, 0, 10], 'b': [0, 3, nan], 'c': ['', 'bb', 'gg'], 'd': [0, 0, 2], 'e': ['', None, 'adad'], 'f': [0, 8, 10], 'g': np.zeros(3, dtype='int'), 'h': [np.nan] * 3 }) assert_frame_equal(df1, df2) df1 = 2 * self.df df2 = dx.DataFrame({ 'a': [0, 0, 10], 'b': [0, 3, nan], 'c': ['', 'bb', 'gg'], 'd': [0, 0, 2], 'e': ['', None, 'adad'], 'f': [0, 8, 10], 'g': np.zeros(3, dtype='int'), 'h': [np.nan] * 3 }) assert_frame_equal(df1, df2)
def test_where_string_cols(self): data = {'a': [9, 10, 9, 9, 10], 'b': [0, nan, nan, 0, 1], 'c': [''] + list('eeaz'), 'd': [False, False, True, False, True], 'e': [0, 20, 30, 4, 4], 'f': ['a', nan, 'ad', None, 'ad'], 'g': [np.nan] * 5} df = de.DataFrame(data) cond = df[:, 'e'] > 9 df1 = df[:, ['c', 'f']].where(cond) df2 = de.DataFrame({'c': [None, 'e', 'e', None, None], 'f': [None, None, 'ad', None, None]}) assert_frame_equal(df1, df2) df1 = df[:, ['c', 'f']].where(cond, 22, 99) df2 = de.DataFrame({'c': [99, 22, 22, 99, 99], 'f': [99, 22, 22, 99, 99]}) assert_frame_equal(df1, df2) df1 = df[:, ['c', 'f']].where(cond, 't') df2 = de.DataFrame({'c': [None, 't', 't', None, None], 'f': [None, 't', 't', None, None]}) assert_frame_equal(df1, df2) df1 = df[:, ['c', 'f']].where(cond, 't', 'y') df2 = de.DataFrame({'c': ['y', 't', 't', 'y', 'y'], 'f': ['y', 't', 't', 'y', 'y']}) assert_frame_equal(df1, df2)
def test_mod_bool(self): some_bool = True with pytest.raises(TypeError): self.df % some_bool with pytest.raises(TypeError): self.df.select_dtypes('str') % some_bool with pytest.raises(TypeError): self.df % some_bool df = dx.DataFrame( { 'a': [6, 7, 10], 'b': [0, 2, nan], 'f': [0, 10, 3], 'g': np.zeros(3, dtype='int64'), 'h': [nan, nan, nan] }, columns=list('abfgh')) df1 = df % some_bool df2 = dx.DataFrame( { 'a': [0, 0, 0], 'b': [0, 0, nan], 'f': [0, 0, 0], 'g': np.zeros(3, dtype='int64'), 'h': [nan, nan, nan] }, columns=list('abfgh')) assert_frame_equal(df1, df2)
def test_bfillna(self): data = { 'a': [4, nan, nan, nan, 3, 2], 'b': [None, 'a', 'd', None, None, 'er'], 'c': [nan, nan, 5, nan, 7, nan] } df = de.DataFrame(data) df1 = df.fillna(method='bfill') df2 = de.DataFrame({ 'a': [4.0, 3.0, 3.0, 3.0, 3.0, 2.0], 'b': ['a', 'a', 'd', 'er', 'er', 'er'], 'c': [5.0, 5.0, 5.0, 7.0, 7.0, nan] }) assert_frame_equal(df1, df2) df1 = df.fillna(method='bfill', limit=1) df2 = de.DataFrame({ 'a': [4.0, nan, nan, 3.0, 3.0, 2.0], 'b': ['a', 'a', 'd', None, 'er', 'er'], 'c': [nan, 5.0, 5.0, 7.0, 7.0, nan] }) assert_frame_equal(df1, df2) with pytest.raises(ValueError): df.fillna(method='bfill', limit=0) with pytest.raises(ValueError): df.fillna(method='bfill', limit=1, fill_function='mean') with pytest.raises(ValueError): df.fillna(values=10, method='bfill')
def test_drop_rows_and_cols(self): data = {'a': [0, 0, 5, 9], 'b': [0, 1.5, 8, 9], 'c': [''] + list('efs'), 'd': [False, False, True, False], 'e': [0, 20, 30, 4], 'f': ['a', nan, 'ad', 'effd'], 'g': [np.nan] * 4} df = de.DataFrame(data) df1 = df.drop(1, 1) df2 = de.DataFrame({'a': [0, 5, 9], 'c': ['', 'f', 's'], 'd': [False, True, False], 'e': [0, 30, 4], 'f': ['a', 'ad', 'effd'], 'g': [nan, nan, nan]}) assert_frame_equal(df1, df2) df1 = df.drop(-2, list('abc')) df2 = de.DataFrame({'d': [False, False, False], 'e': [0, 20, 4], 'f': ['a', None, 'effd'], 'g': [nan, nan, nan]}) assert_frame_equal(df1, df2) df1 = df.drop([0, 3], [3, 'a', -2]) df2 = de.DataFrame({'b': [1.5, 8.0], 'c': ['e', 'f'], 'e': [20, 30], 'g': [nan, nan]}) assert_frame_equal(df1, df2)
def test_mod_float(self): with pytest.raises(TypeError): self.df % 5. with pytest.raises(TypeError): self.df.select_dtypes('str') % 10. with pytest.raises(TypeError): self.df % 'asdf' df = dx.DataFrame( { 'a': [6., 7, 10], 'b': [0., 2, nan], 'f': [0., 10, 3], 'g': np.zeros(3, dtype='float64'), 'h': [nan, nan, nan] }, columns=list('abfgh')) df1 = df % 3. df2 = dx.DataFrame( { 'a': [0., 1, 1], 'b': [0., 2, nan], 'f': [0., 1, 0], 'g': np.zeros(3, dtype='float64'), 'h': [nan, nan, nan] }, columns=list('abfgh')) assert_frame_equal(df1, df2)
def test_truediv_bool(self): some_bool = True with pytest.raises(TypeError): self.df / some_bool with pytest.raises(TypeError): self.df.select_dtypes('str') / some_bool with pytest.raises(TypeError): self.df / some_bool df1 = self.df.select_dtypes('number') / some_bool df2 = dx.DataFrame( { 'a': [0., nan, 5], 'b': [0, 1.5, nan], 'f': [0., 4, 5], 'g': np.zeros(3, dtype='float64'), 'h': [nan, nan, nan] }, columns=list('abfgh')) assert_frame_equal(df1, df2) df1 = some_bool / self.df.select_dtypes('number') df2 = dx.DataFrame( { 'a': [np.inf, nan, .2], 'b': [np.inf, 1 / 1.5, nan], 'f': [np.inf, .25, .2], 'g': [np.inf] * 3, 'h': [nan, nan, nan] }, columns=list('abfgh')) assert_frame_equal(df1, df2)
def test_truediv_float(self): with pytest.raises(TypeError): self.df / 5. with pytest.raises(TypeError): self.df.select_dtypes('str') / 10. with pytest.raises(TypeError): self.df / 'asdf' df1 = self.df.select_dtypes('number') / 2. df2 = dx.DataFrame( { 'a': [0, nan, 2.5], 'b': [0, .75, nan], 'f': [0, 2, 2.5], 'g': np.zeros(3), 'h': [nan, nan, nan] }, columns=list('abfgh')) assert_frame_equal(df1, df2) df1 = 10. / self.df.select_dtypes('number') df2 = dx.DataFrame( { 'a': [np.inf, nan, 2], 'b': [np.inf, 10 / 1.5, nan], 'f': [np.inf, 2.5, 2], 'g': [np.inf] * 3, 'h': [nan, nan, nan] }, columns=list('abfgh')) assert_frame_equal(df1, df2)
def test_scalar_column_with_list_slice_row_selection(self): df1 = df[[4, 6], 2] data = {'c': ['e', 'g']} df2 = dx.DataFrame(data) assert_frame_equal(df1, df2) df1 = df[[4], 2] data = {'c': ['e']} df2 = dx.DataFrame(data) assert_frame_equal(df1, df2) df1 = df[[5, 2], 'f'] data = {'f': [4.0, 3.0]} df2 = dx.DataFrame(data) assert_frame_equal(df1, df2) df1 = df[3:, 'f'] data = {'f': [3.0, 11.0, 4.0, 5.0, 1.0]} df2 = dx.DataFrame(data) assert_frame_equal(df1, df2) df1 = df[5::-2, 'b'] data = {'b': [3.0, 1.0, 8.0]} df2 = dx.DataFrame(data) assert_frame_equal(df1, df2)
def test_drop_rows(self): data = {'a': [0, 0, 5, 9], 'b': [0, 1.5, 8, 9], 'c': [''] + list('efs'), 'd': [False, False, True, False], 'e': [0, 20, 30, 4], 'f': ['a', nan, 'ad', 'effd'], 'g': [np.nan] * 4} df = de.DataFrame(data) df1 = df.drop(rows=3) df2 = de.DataFrame({'a': [0, 0, 5], 'b': [0.0, 1.5, 8.0], 'c': ['', 'e', 'f'], 'd': [False, False, True], 'e': [0, 20, 30], 'f': ['a', None, 'ad'], 'g': [nan, nan, nan]}) assert_frame_equal(df1, df2) with pytest.raises(IndexError): df.drop(rows=5) df1 = df.drop(rows=[-1, 0, -3]) df2 = de.DataFrame({'a': [5], 'b': [8.0], 'c': ['f'], 'd': [True], 'e': [30], 'f': ['ad'], 'g': [nan]}) assert_frame_equal(df1, df2)
def test_where_array_xy(self): data = {'a': [9, 10, 9, 9, 10], 'b': [0, nan, nan, 0, 1], 'c': [''] + list('eeaz'), 'd': [False, False, True, False, True], 'e': [0, 20, 30, 4, 4], 'f': ['a', nan, 'ad', None, 'ad'], 'g': [np.nan] * 5} df = de.DataFrame(data) cond = df[:, 'e'] > 9 df1 = df[:, ['c', 'f']].where(cond, np.arange(5), np.arange(10, 15)) df2 = de.DataFrame({'c': [10, 1, 2, 13, 14], 'f': [10, 1, 2, 13, 14]}) assert_frame_equal(df1, df2) df1 = df[:, ['c', 'f']].where(cond, np.arange(5), 99) df2 = de.DataFrame({'c': [99, 1, 2, 99, 99], 'f': [99, 1, 2, 99, 99]}) assert_frame_equal(df1, df2) with pytest.raises(TypeError): df[:, ['c', 'f']].where(cond, np.arange(5), 'er') df1 = df[:, ['c', 'f']].where(cond, y='er') df2 = de.DataFrame({'c': ['er', 'e', 'e', 'er', 'er'], 'f': ['er', None, 'ad', 'er', 'er']}) assert_frame_equal(df1, df2)
class TestValues: df1 = dx.DataFrame({'a': [1, 5, 7, 11], 'b': [nan, 5.4, -1.1, .045]}) df2 = dx.DataFrame({ 'a': [1, 5, 7, 11], 'b': [nan, 5.4, -1.1, .045], 'c': ['ted', 'fred', 'ted', 'fred'] }) def test_get_values(self): values1 = self.df1.values values2 = np.array([[1, 5, 7, 11], [nan, 5.4, -1.1, .045]]).T assert_array_equal(values1, values2) a = np.random.rand(100, 5) df = dx.DataFrame(a) assert_array_equal(df.values, a) values1 = self.df2.values values2 = np.array([[1, 5, 7, 11], [nan, 5.4, -1.1, .045], ['ted', 'fred', 'ted', 'fred']], dtype='O').T assert_array_equal(values1, values2) def test_shape(self): shape = self.df1.shape assert shape == (4, 2) a = np.random.rand(100, 5) df = dx.DataFrame(a) assert df.shape == (100, 5) def test_size(self): assert (self.df1.size == 8) a = np.random.rand(100, 5) df = dx.DataFrame(a) assert df.size == 500 def test_to_dict(self): d1 = self.df1.to_dict('array') d2 = { 'a': np.array([1, 5, 7, 11]), 'b': np.array([nan, 5.4, -1.1, .045]) } for key, arr in d1.items(): assert_array_equal(arr, d2[key]) d1 = self.df1.to_dict('list') d2 = {'a': [1, 5, 7, 11], 'b': [nan, 5.4, -1.1, .045]} assert_dict_list(d1, d2) def test_copy(self): df2 = self.df1.copy() assert_frame_equal(self.df1, df2)
def test_list_of_string_col_selection(self): df1 = df[:, ['b', 'd', 'a']] data = {'a': [1, 2, 5, 9, 3, 4, 5, 1], 'b': [1.5, 8.0, 9.0, 1.0, 2.0, 3.0, 2.0, 8.0], 'd': [True, False, True, False, True, False, True, False]} df2 = dx.DataFrame(data, columns=['b', 'd', 'a']) assert_frame_equal(df1, df2) df1 = df[:, ['a']] data = {'a': [1, 2, 5, 9, 3, 4, 5, 1]} df2 = dx.DataFrame(data) assert_frame_equal(df1, df2)
def test_sub_float(self): some_float = 5.0 with pytest.raises(TypeError): self.df - some_float with pytest.raises(TypeError): self.df.select_dtypes('str') - some_float df1 = self.df.select_dtypes('int') - some_float df2 = dx.DataFrame( { 'a': [-5., nan, 0], 'f': [-5., -1, 0], 'g': [-5., -5, -5] }, columns=['a', 'f', 'g']) assert_frame_equal(df1, df2) df1 = some_float - self.df.select_dtypes('int') df2 = dx.DataFrame( { 'a': [5., nan, 0], 'f': [5., 1, 0], 'g': 5. - np.zeros(3, dtype='int') }, columns=['a', 'f', 'g']) assert_frame_equal(df1, df2) df1 = self.df.select_dtypes(['number', 'bool']) - some_float df2 = dx.DataFrame( { 'a': [-5., nan, 0], 'b': [-5., -3.5, nan], 'd': [nan, -5., -4], 'f': [-5., -1, 0], 'g': np.zeros(3, dtype='int') - some_float, 'h': [nan, nan, nan] }, columns=list('abdfgh')) assert_frame_equal(df1, df2) df1 = some_float - self.df.select_dtypes(['number', 'bool']) df2 = dx.DataFrame( { 'a': [5., nan, 0], 'b': [5., 3.5, nan], 'd': [nan, 5., 4], 'f': [5., 1, 0], 'g': 5. - np.zeros(3, dtype='int'), 'h': [nan, nan, nan] }, columns=list('abdfgh')) assert_frame_equal(df1, df2)
def test_list_of_integer_col_selection(self): df1 = df[:, [4, 6, 1]] data = {'b': [1.5, 8.0, 9.0, 1.0, 2.0, 3.0, 2.0, 8.0], 'e': [10, 20, 30, 4, 5, 6, 7, 8], 'g': ['x', 'y', 'x', 'x', 'y', 'y', 'x', 'y']} df2 = dx.DataFrame(data, columns=['e', 'g', 'b']) assert_frame_equal(df1, df2) df1 = df[:, [3]] data = {'d': [True, False, True, False, True, False, True, False]} df2 = dx.DataFrame(data) assert_frame_equal(df1, df2)
def test_sub_int(self): with pytest.raises(TypeError): self.df - 5 with pytest.raises(TypeError): self.df.select_dtypes('str') - 10 df1 = self.df.select_dtypes('int') - 5 df2 = dx.DataFrame( { 'a': [-5, nan, 0], 'f': [-5, -1, 0], 'g': [-5, -5, -5] }, columns=['a', 'f', 'g']) assert_frame_equal(df1, df2) df1 = 5 - self.df.select_dtypes('int') df2 = dx.DataFrame( { 'a': [5, nan, 0], 'f': [5, 1, 0], 'g': 5 - np.zeros(3, dtype='int') }, columns=['a', 'f', 'g']) assert_frame_equal(df1, df2) df1 = self.df.select_dtypes(['number', 'bool']) - 5 df2 = dx.DataFrame( { 'a': [-5, nan, 0], 'b': [-5, -3.5, nan], 'd': [nan, -5, -4], 'f': [-5, -1, 0], 'g': np.zeros(3, dtype='int') - 5, 'h': [nan, nan, nan] }, columns=list('abdfgh')) assert_frame_equal(df1, df2) df1 = 5 - self.df.select_dtypes(['number', 'bool']) df2 = dx.DataFrame( { 'a': [5, nan, 0], 'b': [5, 3.5, nan], 'd': [nan, 5, 4], 'f': [5, 1, 0], 'g': 5 - np.zeros(3, dtype='int'), 'h': [nan, nan, nan] }, columns=list('abdfgh')) assert_frame_equal(df1, df2)
def test_slice_with_integers_col_selection(self): df1 = df[:, 3:6] data = {'d': [True, False, True, False, True, False, True, False], 'e': [10, 20, 30, 4, 5, 6, 7, 8], 'f': [1.0, 3.0, 3.0, 3.0, 11.0, 4.0, 5.0, 1.0]} df2 = dx.DataFrame(data, columns=['d', 'e', 'f']) assert_frame_equal(df1, df2) df1 = df[:, -4::2] data = {'e': [10, 20, 30, 4, 5, 6, 7, 8], 'g': ['x', 'y', 'x', 'x', 'y', 'y', 'x', 'y']} df2 = dx.DataFrame(data, columns=['e', 'g']) assert_frame_equal(df1, df2)
def test_add_string(self): df1 = self.df.select_dtypes('str') + 'aaa' df2 = dx.DataFrame({ 'c': ['aaa', 'baaa', 'gaaa'], 'e': ['aaa', None, 'adaaa'] }) assert_frame_equal(df1, df2) df1 = 'aaa' + self.df.select_dtypes('str') df2 = dx.DataFrame({ 'c': ['aaa', 'aaab', 'aaag'], 'e': ['aaa', None, 'aaaad'] }) assert_frame_equal(df1, df2)
def test_nlargest_float(self): data = {'a': [9, 10, 9, 9, 10], 'b': [0, nan, nan, 0, 1], 'c': [''] + list('eeaz'), 'd': [False, False, True, False, True], 'e': [0, 20, 30, 4, 4], 'f': ['a', nan, 'ad', None, 'ad'], 'g': [np.nan] * 5} df = de.DataFrame(data) df1 = df.nlargest(2, 'b') df2 = de.DataFrame({'a': [10, 9, 9], 'b': [1.0, 0.0, 0.0], 'c': ['z', '', 'a'], 'd': [True, False, False], 'e': [4, 0, 4], 'f': ['ad', 'a', None], 'g': [nan, nan, nan]}) assert_frame_equal(df1, df2) df1 = df.nlargest(2, 'b', keep='first') df2 = de.DataFrame({'a': [10, 9], 'b': [1.0, 0.0], 'c': ['z', ''], 'd': [True, False], 'e': [4, 0], 'f': ['ad', 'a'], 'g': [nan, nan]}) assert_frame_equal(df1, df2) df1 = df.nlargest(2, 'b', keep='last') df2 = de.DataFrame({'a': [10, 9], 'b': [1.0, 0.0], 'c': ['z', 'a'], 'd': [True, False], 'e': [4, 4], 'f': ['ad', None], 'g': [nan, nan]}) assert_frame_equal(df1, df2) df1 = df.nlargest(1, 'g', keep='all') df2 = de.DataFrame({'a': [9], 'b': [0.0], 'c': [''], 'd': [False], 'e': [0], 'f': ['a'], 'g': [nan]}) assert_frame_equal(df1, df2)
def test_slice_labels_and_integer_col_selection(self): df1 = df[:, 'c':5] data = {'c': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'], 'd': [True, False, True, False, True, False, True, False], 'e': [10, 20, 30, 4, 5, 6, 7, 8]} df2 = dx.DataFrame(data, columns=['c', 'd', 'e']) assert_frame_equal(df1, df2) df1 = df[:, 6:'d':-1] data = {'d': [True, False, True, False, True, False, True, False], 'e': [10, 20, 30, 4, 5, 6, 7, 8], 'f': [1.0, 3.0, 3.0, 3.0, 11.0, 4.0, 5.0, 1.0], 'g': ['x', 'y', 'x', 'x', 'y', 'y', 'x', 'y']} df2 = dx.DataFrame(data, columns=['g', 'f', 'e', 'd']) assert_frame_equal(df1, df2)
def test_add_float(self): some_float = 5.0 with pytest.raises(TypeError): self.df + some_float df1 = self.df.select_dtypes('int') + some_float df2 = dx.DataFrame( { 'a': [5., nan, 10], 'f': [5., 9, 10], 'g': [5., 5, 5] }, columns=['a', 'f', 'g']) assert_frame_equal(df1, df2) df1 = some_float + self.df.select_dtypes('int') assert_frame_equal(df1, df2) df1 = self.df.select_dtypes('number') + some_float df2 = dx.DataFrame( { 'a': [5., nan, 10], 'b': [5., 6.5, nan], 'f': [5., 9, 10], 'g': [5., 5, 5], 'h': [nan, nan, nan] }, columns=list('abfgh')) assert_frame_equal(df1, df2) df1 = some_float + self.df.select_dtypes('number') assert_frame_equal(df1, df2) df1 = self.df.select_dtypes(['number', 'bool']) + some_float df2 = dx.DataFrame( { 'a': [5., nan, 10], 'b': [5., 6.5, nan], 'd': [nan, 5., 6], 'f': [5., 9, 10], 'g': [5., 5, 5], 'h': [nan, nan, nan] }, columns=list('abdfgh')) assert_frame_equal(df1, df2) df1 = some_float + self.df.select_dtypes(['number', 'bool']) assert_frame_equal(df1, df2)
def test_integer_condition(self): criteria = self.df[:, 'a'] > 4 df1 = self.df[criteria, :] df2 = self.df[[2, 3, 6], :] assert_frame_equal(df1, df2) criteria = self.df[:, 'a'] == 0 df1 = self.df[criteria, :] df2 = self.df[[0, 1], :] assert_frame_equal(df1, df2) criteria = (self.df[:, 'a'] > 2) & (self.df[:, 'i'] < 6) df1 = self.df[criteria, :] df2 = self.df[[3, 4, 5, 6], :] assert_frame_equal(df1, df2) criteria = (self.df[:, 'a'] > 2) | (self.df[:, 'i'] < 6) df1 = self.df[criteria, :] df2 = self.df[[0, 2, 3, 4, 5, 6], :] assert_frame_equal(df1, df2) criteria = ~((self.df[:, 'a'] > 2) | (self.df[:, 'i'] < 6)) df1 = self.df[criteria, :] df2 = self.df[[1, 7], :] assert_frame_equal(df1, df2) criteria = ~((self.df[:, 'a'] > 2) | (self.df[:, 'i'] < 6)) df1 = self.df[criteria, ['d', 'b']] df2 = dx.DataFrame({'b': [1.512344353, 8], 'd': [False, False]}, columns=['d', 'b']) assert_frame_equal(df1, df2)
def test_factorize(self): data = {'a': [9, 10, 9, 9, 10], 'b': [0, nan, nan, 0, 1], 'c': [''] + list('eeaz'), 'd': [False, False, True, False, True], 'e': [0, 20, 30, 4, 4], 'f': ['a', nan, 'ad', None, 'ad'], 'g': [np.nan] * 5} df = de.DataFrame(data) arr11, arr12 = df.factorize('a') arr21, arr22 = (array([0, 1, 0, 0, 1]), array([9, 10])) assert_array_equal(arr11, arr21) assert_array_equal(arr12, arr22) arr11, arr12 = df.factorize('b') arr21, arr22 = (array([0, 1, 1, 0, 2]), array([0., nan, 1.])) assert_array_equal(arr11, arr21) assert_array_equal(arr12, arr22) arr11, arr12 = df.factorize('c') arr21, arr22 = (array([0, 1, 1, 2, 3]), array(['', 'e', 'a', 'z'], dtype=object)) assert_array_equal(arr11, arr21) assert_array_equal(arr12, arr22) arr11, arr12 = df.factorize('d') arr21, arr22 = (array([0, 0, 1, 0, 1]), array([False, True])) assert_array_equal(arr11, arr21) assert_array_equal(arr12, arr22)