def test_truediv_bool(self): some_bool = True with pytest.raises(TypeError): self.df / some_bool with pytest.raises(TypeError): self.df.select_dtypes('str') / some_bool with pytest.raises(TypeError): self.df / some_bool df1 = self.df.select_dtypes('number') / some_bool df2 = dx.DataFrame( { 'a': [0., nan, 5], 'b': [0, 1.5, nan], 'f': [0., 4, 5], 'g': np.zeros(3, dtype='float64'), 'h': [nan, nan, nan] }, columns=list('abfgh')) assert_frame_equal(df1, df2) df1 = some_bool / self.df.select_dtypes('number') df2 = dx.DataFrame( { 'a': [np.inf, nan, .2], 'b': [np.inf, 1 / 1.5, nan], 'f': [np.inf, .25, .2], 'g': [np.inf] * 3, 'h': [nan, nan, nan] }, columns=list('abfgh')) assert_frame_equal(df1, df2)
def test_value_counts_sort_na(self): df = dx.DataFrame( {'AIRLINE': ['EV', 'VX', 'AA', 'UA', 'DL', 'B6', 'WN', 'AA', 'DL', 'AS', None, None], 'DAY_OF_WEEK': [2, 1, 6, 4, 5, 5, 7, 5, 1, 4, 3, 3], 'DEPARTURE_DELAY': [nan, -4.0, -1.0, -4.0, -1.0, 22.0, -3.0, 3.0, 21.0, -2.0, nan, 22]}) df1 = df.value_counts('DAY_OF_WEEK', sort=False) df2 = dx.DataFrame({'DAY_OF_WEEK': [1, 2, 3, 4, 5, 6, 7], 'count': [2, 1, 2, 2, 3, 1, 1]}) assert_frame_equal(df1, df2) df1 = df.value_counts('DEPARTURE_DELAY', sort=False) df2 = dx.DataFrame({'DEPARTURE_DELAY': [-4.0, -1.0, 22.0, -3.0, 3.0, 21.0, -2.0], 'count': [2, 2, 2, 1, 1, 1, 1]}) assert_frame_equal(df1, df2) df1 = df.value_counts('DEPARTURE_DELAY', dropna=False) df2 = dx.DataFrame({'DEPARTURE_DELAY': [22.0, -1.0, -4.0, nan, -2.0, 21.0, 3.0, -3.0], 'count': [2, 2, 2, 2, 1, 1, 1, 1]}) assert_frame_equal(df1, df2) df1 = df.value_counts('AIRLINE', dropna=False) df2 = dx.DataFrame({'AIRLINE': [None, 'DL', 'AA', 'AS', 'WN', 'B6', 'UA', 'VX', 'EV'], 'count': [2, 2, 2, 1, 1, 1, 1, 1, 1]}) assert_frame_equal(df1, df2) df1 = df.value_counts('DEPARTURE_DELAY', dropna=False, sort=False) df2 = dx.DataFrame({'DEPARTURE_DELAY': [nan, -4.0, -1.0, 22.0, -3.0, 3.0, 21.0, -2.0], 'count': [2, 2, 2, 2, 1, 1, 1, 1]}) assert_frame_equal(df1, df2)
def test_mod_float(self): with pytest.raises(TypeError): self.df % 5. with pytest.raises(TypeError): self.df.select_dtypes('str') % 10. with pytest.raises(TypeError): self.df % 'asdf' df = dx.DataFrame( { 'a': [6., 7, 10], 'b': [0., 2, nan], 'f': [0., 10, 3], 'g': np.zeros(3, dtype='float64'), 'h': [nan, nan, nan] }, columns=list('abfgh')) df1 = df % 3. df2 = dx.DataFrame( { 'a': [0., 1, 1], 'b': [0., 2, nan], 'f': [0., 1, 0], 'g': np.zeros(3, dtype='float64'), 'h': [nan, nan, nan] }, columns=list('abfgh')) assert_frame_equal(df1, df2)
def test_bfillna(self): data = { 'a': [4, nan, nan, nan, 3, 2], 'b': [None, 'a', 'd', None, None, 'er'], 'c': [nan, nan, 5, nan, 7, nan] } df = de.DataFrame(data) df1 = df.fillna(method='bfill') df2 = de.DataFrame({ 'a': [4.0, 3.0, 3.0, 3.0, 3.0, 2.0], 'b': ['a', 'a', 'd', 'er', 'er', 'er'], 'c': [5.0, 5.0, 5.0, 7.0, 7.0, nan] }) assert_frame_equal(df1, df2) df1 = df.fillna(method='bfill', limit=1) df2 = de.DataFrame({ 'a': [4.0, nan, nan, 3.0, 3.0, 2.0], 'b': ['a', 'a', 'd', None, 'er', 'er'], 'c': [nan, 5.0, 5.0, 7.0, 7.0, nan] }) assert_frame_equal(df1, df2) with pytest.raises(ValueError): df.fillna(method='bfill', limit=0) with pytest.raises(ValueError): df.fillna(method='bfill', limit=1, fill_function='mean') with pytest.raises(ValueError): df.fillna(values=10, method='bfill')
def test_less_than_equal(self): with pytest.raises(TypeError): self.df <= 5 with pytest.raises(TypeError): self.df.select_dtypes('str') <= 10 with pytest.raises(TypeError): self.df <= 'asdf' df = dx.DataFrame({ 'a': [6, 7, 10], 'b': [0, 2, nan], 'f': [0, 10, 3], 'g': np.zeros(3, dtype='int'), 'h': [nan, nan, nan] }) df1 = df <= 3 df2 = dx.DataFrame({ 'a': [False, False, False], 'b': [True, True, False], 'f': [True, False, True], 'g': np.ones(3, dtype='bool'), 'h': [False] * 3 }) assert_frame_equal(df1, df2)
def test_drop_rows(self): data = {'a': [0, 0, 5, 9], 'b': [0, 1.5, 8, 9], 'c': [''] + list('efs'), 'd': [False, False, True, False], 'e': [0, 20, 30, 4], 'f': ['a', nan, 'ad', 'effd'], 'g': [np.nan] * 4} df = de.DataFrame(data) df1 = df.drop(rows=3) df2 = de.DataFrame({'a': [0, 0, 5], 'b': [0.0, 1.5, 8.0], 'c': ['', 'e', 'f'], 'd': [False, False, True], 'e': [0, 20, 30], 'f': ['a', None, 'ad'], 'g': [nan, nan, nan]}) assert_frame_equal(df1, df2) with pytest.raises(IndexError): df.drop(rows=5) df1 = df.drop(rows=[-1, 0, -3]) df2 = de.DataFrame({'a': [5], 'b': [8.0], 'c': ['f'], 'd': [True], 'e': [30], 'f': ['ad'], 'g': [nan]}) assert_frame_equal(df1, df2)
def test_sort_values_multiple(self): df = de.DataFrame({'a': [2, 3, nan, 6, 3, 2], 'b': [None, 'f', 'd', 'f', 'd', 'er'], 'c': [12, 444, -5.6, 5, 1, 7]}) df1 = df.sort_values(['a', 'b'], ascending=False) df2 = de.DataFrame({'a': [6.0, 3.0, 3.0, 2.0, 2.0, nan], 'b': ['f', 'f', 'd', 'er', None, 'd'], 'c': [5.0, 444.0, 1.0, 7.0, 12.0, -5.6]}) assert_frame_equal(df1, df2) df1 = df.sort_values(['a', 'b'], ascending=True) df2 = de.DataFrame({'a': [2.0, 2.0, 3.0, 3.0, 6.0, nan], 'b': ['er', None, 'd', 'f', 'f', 'd'], 'c': [7.0, 12.0, 1.0, 444.0, 5.0, -5.6]}) assert_frame_equal(df1, df2) df1 = df.sort_values(['a', 'b'], ascending=[True, False]) df2 = de.DataFrame({'a': [2.0, 2.0, 3.0, 3.0, 6.0, nan], 'b': ['er', None, 'f', 'd', 'f', 'd'], 'c': [7.0, 12.0, 444.0, 1.0, 5.0, -5.6]}) assert_frame_equal(df1, df2) df1 = df.sort_values(['a', 'b'], ascending=[False, True]) df2 = de.DataFrame({'a': [6.0, 3.0, 3.0, 2.0, 2.0, nan], 'b': ['f', 'd', 'f', 'er', None, 'd'], 'c': [5.0, 1.0, 444.0, 7.0, 12.0, -5.6]}) assert_frame_equal(df1, df2) df1 = df.sort_values(['b', 'a'], ascending=[False, True]) df2 = de.DataFrame({'a': [3.0, 6.0, 2.0, 3.0, nan, 2.0], 'b': ['f', 'f', 'er', 'd', 'd', None], 'c': [444.0, 5.0, 7.0, 1.0, -5.6, 12.0]}) assert_frame_equal(df1, df2)
def test_truediv_float(self): with pytest.raises(TypeError): self.df / 5. with pytest.raises(TypeError): self.df.select_dtypes('str') / 10. with pytest.raises(TypeError): self.df / 'asdf' df1 = self.df.select_dtypes('number') / 2. df2 = dx.DataFrame( { 'a': [0, nan, 2.5], 'b': [0, .75, nan], 'f': [0, 2, 2.5], 'g': np.zeros(3), 'h': [nan, nan, nan] }, columns=list('abfgh')) assert_frame_equal(df1, df2) df1 = 10. / self.df.select_dtypes('number') df2 = dx.DataFrame( { 'a': [np.inf, nan, 2], 'b': [np.inf, 10 / 1.5, nan], 'f': [np.inf, 2.5, 2], 'g': [np.inf] * 3, 'h': [nan, nan, nan] }, columns=list('abfgh')) assert_frame_equal(df1, df2)
def test_scalar_column_with_list_slice_row_selection(self): df1 = df[[4, 6], 2] data = {'c': ['e', 'g']} df2 = dx.DataFrame(data) assert_frame_equal(df1, df2) df1 = df[[4], 2] data = {'c': ['e']} df2 = dx.DataFrame(data) assert_frame_equal(df1, df2) df1 = df[[5, 2], 'f'] data = {'f': [4.0, 3.0]} df2 = dx.DataFrame(data) assert_frame_equal(df1, df2) df1 = df[3:, 'f'] data = {'f': [3.0, 11.0, 4.0, 5.0, 1.0]} df2 = dx.DataFrame(data) assert_frame_equal(df1, df2) df1 = df[5::-2, 'b'] data = {'b': [3.0, 1.0, 8.0]} df2 = dx.DataFrame(data) assert_frame_equal(df1, df2)
def test_add_df(self): df_answer = dx.DataFrame({ 'a': np.array([2, 4]), 'b': np.array([-20, 20]), 'c': np.array([3., 16.]), 'd': np.array([4.6, nan]), 'e': np.array(['aa', 'bb'], dtype=object), 'f': np.array([True, False]), 'g': np.array([0, 172800000000000], dtype='timedelta64[ns]') }) assert_frame_equal(self.df + self.df, df_answer) df_answer = dx.DataFrame({ 'a': array([5, 7]), 'b': array([-10, 10]), 'c': array([3.5, 10.]), 'd': array([0.3, nan]) }) df_result = self.df_number + self.df_number2 assert_frame_equal(df_result, df_answer)
def test_boolean_column_selection(self): data = {'a': [0, 0, 5, 9, 3, 4, 5, 1], 'b': [0, 1.512344353, 8, 9, np.nan, 3, 2, 8], 'c': [''] + list('bgggzgh'), 'd': [False, False, True, False] * 2, 'e': [0, 20, 30, 4, 5, 6, 7, 8], 'f': [0., 3, 3, 3, 11, 4, 5, 1], 'g': ['', None, 'ad', 'effd', 'ef', None, 'ett', 'zzzz'], 'h': [0, 4, 5, 6, 7, 8, 9, 0], 'i': np.array([0, 7, 6, 5, 4, 3, 2, 11]), 'j': np.zeros(8, dtype='int'), 'k': np.ones(8) - 1, 'l': [np.nan] * 8} df = dx.DataFrame(data) df1 = df.select_dtypes('int') df_criteria = df1[1, :] == 0 df1 = df1[:, df_criteria] df2 = dx.DataFrame({'a': [0, 0, 5, 9, 3, 4, 5, 1], 'j': np.zeros(8, dtype='int')}) assert_frame_equal(df1, df2) criteria = np.array([False, False, False, True, True, False, False, False, False, False, False, False]) df1 = df[-3:, criteria] df2 = dx.DataFrame({'d': [False, True, False], 'e': [6, 7, 8]}) assert_frame_equal(df1, df2)
def test_mod_division_frame(self): with pytest.raises(TypeError): self.df % 5 with pytest.raises(TypeError): self.df.select_dtypes('str') % 10 with pytest.raises(TypeError): self.df % 'asdf' df = dx.DataFrame( { 'a': [6, 7, 10], 'b': [0, 2, nan], 'f': [0, 10, 3], 'g': np.zeros(3, dtype='int'), 'h': [np.nan] * 3 }, columns=list('abfgh')) df1 = df % 3 df2 = dx.DataFrame( { 'a': [0, 1, 1], 'b': [0, 2, nan], 'f': [0, 1, 0], 'g': np.zeros(3, dtype='int'), 'h': [np.nan] * 3 }, columns=list('abfgh')) assert_frame_equal(df1, df2)
def test_floordiv_frame(self): with pytest.raises(TypeError): self.df // 5 with pytest.raises(TypeError): self.df.select_dtypes('str') // 10 with pytest.raises(TypeError): self.df // 'asdf' df = dx.DataFrame( { 'a': [0, 0, 10], 'b': [0, 20, nan], 'f': [0, 100, 10], 'g': np.zeros(3, dtype='int'), 'h': [np.nan] * 3 }, columns=list('abfgh')) df1 = df // 3 df2 = dx.DataFrame( { 'a': [0, 0, 3], 'b': [0, 6, nan], 'f': [0, 33, 3], 'g': np.zeros(3, dtype='int'), 'h': [np.nan] * 3 }, columns=list('abfgh')) assert_frame_equal(df1, df2)
def test_mult_frame(self): df1 = self.df * 2 df2 = dx.DataFrame({ 'a': [0, 0, 10], 'b': [0, 3, nan], 'c': ['', 'bb', 'gg'], 'd': [0, 0, 2], 'e': ['', None, 'adad'], 'f': [0, 8, 10], 'g': np.zeros(3, dtype='int'), 'h': [np.nan] * 3 }) assert_frame_equal(df1, df2) df1 = 2 * self.df df2 = dx.DataFrame({ 'a': [0, 0, 10], 'b': [0, 3, nan], 'c': ['', 'bb', 'gg'], 'd': [0, 0, 2], 'e': ['', None, 'adad'], 'f': [0, 8, 10], 'g': np.zeros(3, dtype='int'), 'h': [np.nan] * 3 }) assert_frame_equal(df1, df2)
def test_greater_than_equal(self): with pytest.raises(TypeError): self.df >= 5 with pytest.raises(TypeError): self.df.select_dtypes('str') >= 10 with pytest.raises(TypeError): self.df >= 'asdf' df = dx.DataFrame( { 'a': [6, 7, 10], 'b': [0, 2, nan], 'f': [0, 10, 3], 'g': np.zeros(3, dtype='int'), 'h': [nan, nan, nan] }, columns=list('abfgh')) df1 = df >= 3 df2 = dx.DataFrame( { 'a': [True, True, True], 'b': [False, False, False], 'f': [False, True, True], 'g': np.zeros(3, dtype='bool'), 'h': [False] * 3 }, columns=list('abfgh')) assert_frame_equal(df1, df2)
def test_mod_bool(self): some_bool = True with pytest.raises(TypeError): self.df % some_bool with pytest.raises(TypeError): self.df.select_dtypes('str') % some_bool with pytest.raises(TypeError): self.df % some_bool df = dx.DataFrame( { 'a': [6, 7, 10], 'b': [0, 2, nan], 'f': [0, 10, 3], 'g': np.zeros(3, dtype='int64'), 'h': [nan, nan, nan] }, columns=list('abfgh')) df1 = df % some_bool df2 = dx.DataFrame( { 'a': [0, 0, 0], 'b': [0, 0, nan], 'f': [0, 0, 0], 'g': np.zeros(3, dtype='int64'), 'h': [nan, nan, nan] }, columns=list('abfgh')) assert_frame_equal(df1, df2)
def test_selectdtypes_str(self): df1 = self.df.select_dtypes('str') df2 = dx.DataFrame( { 'c': [''] + list('bgggzgh'), 'g': ['', None, 'ad', 'effd', 'ef', None, 'ett', 'zzzz'] }, columns=['c', 'g']) assert_frame_equal(df1, df2)
def test_add_string(self): df_answer = dx.DataFrame({ 'a': array(['oneone', 'twotwo'], dtype=object), 'b': array(['threethree', 'fourfour'], dtype=object) }) df_result = self.df_strings + self.df_strings assert_frame_equal(df_answer, df_result)
def test_selectdtypes_float(self): df1 = self.df.select_dtypes('float') df2 = dx.DataFrame( { 'b': [0, 1.512344353, 8, 9, np.nan, 3, 2, 8], 'f': [0., 3, 3, 3, 11, 4, 5, 1], 'k': np.ones(8) - 1, 'l': [np.nan] * 8 }, columns=list('bfkl')) assert_frame_equal(df1, df2)
def test_add_one_col(self): df_answer = dx.DataFrame({ 'a': np.array([6., 4.1]), 'b': np.array([-5., 12.1]), 'c': np.array([6.5, 10.1]), 'd': np.array([7.3, nan]) }) df_result = self.df_number + self.df_one_col assert_frame_equal(df_result, df_answer) df_result = self.df_one_col + self.df_number assert_frame_equal(df_result, df_answer)
def test_list_of_integer_col_selection(self): df1 = df[:, [4, 6, 1]] data = {'b': [1.5, 8.0, 9.0, 1.0, 2.0, 3.0, 2.0, 8.0], 'e': [10, 20, 30, 4, 5, 6, 7, 8], 'g': ['x', 'y', 'x', 'x', 'y', 'y', 'x', 'y']} df2 = dx.DataFrame(data, columns=['e', 'g', 'b']) assert_frame_equal(df1, df2) df1 = df[:, [3]] data = {'d': [True, False, True, False, True, False, True, False]} df2 = dx.DataFrame(data) assert_frame_equal(df1, df2)
def test_add_one_row(self): df_answer = dx.DataFrame({ 'a': array([6, 7]), 'b': array([89, 109]), 'c': array([3.6, 10.1]), 'd': array([nan, nan]) }) df_result = self.df_number + self.df_one_row_number assert_frame_equal(df_result, df_answer) df_result = self.df_number + self.df_one_row_number assert_frame_equal(df_answer, df_result)
def test_list_of_string_col_selection(self): df1 = df[:, ['b', 'd', 'a']] data = {'a': [1, 2, 5, 9, 3, 4, 5, 1], 'b': [1.5, 8.0, 9.0, 1.0, 2.0, 3.0, 2.0, 8.0], 'd': [True, False, True, False, True, False, True, False]} df2 = dx.DataFrame(data, columns=['b', 'd', 'a']) assert_frame_equal(df1, df2) df1 = df[:, ['a']] data = {'a': [1, 2, 5, 9, 3, 4, 5, 1]} df2 = dx.DataFrame(data) assert_frame_equal(df1, df2)
def test_float_to_float(self): df1 = df.astype({'e': 'float'}) df2 = dx.DataFrame({ 'a': [1, nan, 10, 0], 'b': ['a', 'a', 'c', 'c'], 'c': [5, 1, nan, 3], 'd': [True, False, True, nan], 'e': [3.2, nan, 1, 0], 'f': np.array([5, 10, NaTdt, 4], 'datetime64[Y]'), 'g': np.array([22, 10, NaTtd, 8], 'timedelta64[m]') }) assert_frame_equal(df1, df2)
def test_get_dtypes(self): df1 = self.df.dtypes df2 = dx.DataFrame( { 'Column Name': list('abcdefghijkl'), 'Data Type': [ 'int', 'float', 'str', 'bool', 'int', 'float', 'str', 'int', 'int', 'int', 'float', 'float' ] }, columns=['Column Name', 'Data Type']) assert_frame_equal(df1, df2)
def test_slice_with_integers_col_selection(self): df1 = df[:, 3:6] data = {'d': [True, False, True, False, True, False, True, False], 'e': [10, 20, 30, 4, 5, 6, 7, 8], 'f': [1.0, 3.0, 3.0, 3.0, 11.0, 4.0, 5.0, 1.0]} df2 = dx.DataFrame(data, columns=['d', 'e', 'f']) assert_frame_equal(df1, df2) df1 = df[:, -4::2] data = {'e': [10, 20, 30, 4, 5, 6, 7, 8], 'g': ['x', 'y', 'x', 'x', 'y', 'y', 'x', 'y']} df2 = dx.DataFrame(data, columns=['e', 'g']) assert_frame_equal(df1, df2)
def test_selectdtypes_ints(self): df1 = self.df.select_dtypes('int') df2 = dx.DataFrame( { 'a': [0, 0, 5, 9, 3, 4, 5, 1], 'e': [0, 20, 30, 4, 5, 6, 7, 8], 'h': [0, 4, 5, 6, 7, 8, 9, 0], 'i': np.array([0, 7, 6, 5, 4, 3, 2, 11]), 'j': np.zeros(8, dtype='int') }, columns=list('aehij')) assert_frame_equal(df1, df2)
def test_sort_values_one(self): data = {'a': [4, 3, nan, 6, 3, 2], 'b': [None, 'f', 'd', 'f', 'd', 'er'], 'c': [12, 444, -5.6, 5, 1, 7]} df = de.DataFrame(data) df1 = df.sort_values('a') df2 = de.DataFrame(data={'a': [2.0, 3.0, 3.0, 4.0, 6.0, nan], 'b': ['er', 'f', 'd', None, 'f', 'd'], 'c': [7.0, 444.0, 1.0, 12.0, 5.0, -5.6]}) assert_frame_equal(df1, df2) df1 = df.sort_values('b') df2 = de.DataFrame({'a': [nan, 3.0, 2.0, 3.0, 6.0, 4.0], 'b': ['d', 'd', 'er', 'f', 'f', None], 'c': [-5.6, 1.0, 7.0, 444.0, 5.0, 12.0]}) assert_frame_equal(df1, df2) df = de.DataFrame({'a': [2, 3, nan, 6, 3, 2], 'b': [None, 'f', 'd', 'f', 'd', 'er'], 'c': [12, 444, -5.6, 5, 1, 7]}) df1 = df.sort_values('b', ascending=False) df2 = de.DataFrame({'a': [3.0, 6.0, 2.0, nan, 3.0, 2.0], 'b': ['f', 'f', 'er', 'd', 'd', None], 'c': [444.0, 5.0, 7.0, -5.6, 1.0, 12.0]}) assert_frame_equal(df1, df2) df1 = df.sort_values('a', ascending=False) df2 = de.DataFrame({'a': [6.0, 3.0, 3.0, 2.0, 2.0, nan], 'b': ['f', 'f', 'd', None, 'er', 'd'], 'c': [5.0, 444.0, 1.0, 12.0, 7.0, -5.6]}) assert_frame_equal(df1, df2)
def test_add_string(self): df1 = self.df.select_dtypes('str') + 'aaa' df2 = dx.DataFrame({ 'c': ['aaa', 'baaa', 'gaaa'], 'e': ['aaa', None, 'adaaa'] }) assert_frame_equal(df1, df2) df1 = 'aaa' + self.df.select_dtypes('str') df2 = dx.DataFrame({ 'c': ['aaa', 'aaab', 'aaag'], 'e': ['aaa', None, 'aaaad'] }) assert_frame_equal(df1, df2)
def test_where_string_cols(self): data = {'a': [9, 10, 9, 9, 10], 'b': [0, nan, nan, 0, 1], 'c': [''] + list('eeaz'), 'd': [False, False, True, False, True], 'e': [0, 20, 30, 4, 4], 'f': ['a', nan, 'ad', None, 'ad'], 'g': [np.nan] * 5} df = de.DataFrame(data) cond = df[:, 'e'] > 9 df1 = df[:, ['c', 'f']].where(cond) df2 = de.DataFrame({'c': [None, 'e', 'e', None, None], 'f': [None, None, 'ad', None, None]}) assert_frame_equal(df1, df2) df1 = df[:, ['c', 'f']].where(cond, 22, 99) df2 = de.DataFrame({'c': [99, 22, 22, 99, 99], 'f': [99, 22, 22, 99, 99]}) assert_frame_equal(df1, df2) df1 = df[:, ['c', 'f']].where(cond, 't') df2 = de.DataFrame({'c': [None, 't', 't', None, None], 'f': [None, 't', 't', None, None]}) assert_frame_equal(df1, df2) df1 = df[:, ['c', 'f']].where(cond, 't', 'y') df2 = de.DataFrame({'c': ['y', 't', 't', 'y', 'y'], 'f': ['y', 't', 't', 'y', 'y']}) assert_frame_equal(df1, df2)