Ejemplo n.º 1
0
    def test_truediv_bool(self):
        some_bool = True

        with pytest.raises(TypeError):
            self.df / some_bool

        with pytest.raises(TypeError):
            self.df.select_dtypes('str') / some_bool

        with pytest.raises(TypeError):
            self.df / some_bool

        df1 = self.df.select_dtypes('number') / some_bool
        df2 = dx.DataFrame(
            {
                'a': [0., nan, 5],
                'b': [0, 1.5, nan],
                'f': [0., 4, 5],
                'g': np.zeros(3, dtype='float64'),
                'h': [nan, nan, nan]
            },
            columns=list('abfgh'))
        assert_frame_equal(df1, df2)

        df1 = some_bool / self.df.select_dtypes('number')
        df2 = dx.DataFrame(
            {
                'a': [np.inf, nan, .2],
                'b': [np.inf, 1 / 1.5, nan],
                'f': [np.inf, .25, .2],
                'g': [np.inf] * 3,
                'h': [nan, nan, nan]
            },
            columns=list('abfgh'))
        assert_frame_equal(df1, df2)
Ejemplo n.º 2
0
    def test_value_counts_sort_na(self):
        df = dx.DataFrame(
            {'AIRLINE': ['EV', 'VX', 'AA', 'UA', 'DL', 'B6', 'WN', 'AA', 'DL', 'AS', None, None],
             'DAY_OF_WEEK': [2, 1, 6, 4, 5, 5, 7, 5, 1, 4, 3, 3],
             'DEPARTURE_DELAY': [nan, -4.0, -1.0, -4.0, -1.0, 22.0, -3.0, 3.0, 21.0,
                                 -2.0, nan, 22]})

        df1 = df.value_counts('DAY_OF_WEEK', sort=False)
        df2 = dx.DataFrame({'DAY_OF_WEEK': [1, 2, 3, 4, 5, 6, 7], 'count': [2, 1, 2, 2, 3, 1, 1]})
        assert_frame_equal(df1, df2)

        df1 = df.value_counts('DEPARTURE_DELAY', sort=False)
        df2 = dx.DataFrame({'DEPARTURE_DELAY': [-4.0, -1.0, 22.0, -3.0, 3.0, 21.0, -2.0],
                            'count': [2, 2, 2, 1, 1, 1, 1]})
        assert_frame_equal(df1, df2)

        df1 = df.value_counts('DEPARTURE_DELAY', dropna=False)
        df2 = dx.DataFrame({'DEPARTURE_DELAY': [22.0, -1.0, -4.0, nan, -2.0, 21.0, 3.0, -3.0],
                            'count': [2, 2, 2, 2, 1, 1, 1, 1]})
        assert_frame_equal(df1, df2)

        df1 = df.value_counts('AIRLINE', dropna=False)
        df2 = dx.DataFrame({'AIRLINE': [None, 'DL', 'AA', 'AS', 'WN', 'B6', 'UA', 'VX', 'EV'],
                            'count': [2, 2, 2, 1, 1, 1, 1, 1, 1]})
        assert_frame_equal(df1, df2)

        df1 = df.value_counts('DEPARTURE_DELAY', dropna=False, sort=False)
        df2 = dx.DataFrame({'DEPARTURE_DELAY': [nan, -4.0, -1.0, 22.0, -3.0, 3.0, 21.0, -2.0],
                            'count': [2, 2, 2, 2, 1, 1, 1, 1]})
        assert_frame_equal(df1, df2)
Ejemplo n.º 3
0
    def test_mod_float(self):
        with pytest.raises(TypeError):
            self.df % 5.

        with pytest.raises(TypeError):
            self.df.select_dtypes('str') % 10.

        with pytest.raises(TypeError):
            self.df % 'asdf'

        df = dx.DataFrame(
            {
                'a': [6., 7, 10],
                'b': [0., 2, nan],
                'f': [0., 10, 3],
                'g': np.zeros(3, dtype='float64'),
                'h': [nan, nan, nan]
            },
            columns=list('abfgh'))

        df1 = df % 3.
        df2 = dx.DataFrame(
            {
                'a': [0., 1, 1],
                'b': [0., 2, nan],
                'f': [0., 1, 0],
                'g': np.zeros(3, dtype='float64'),
                'h': [nan, nan, nan]
            },
            columns=list('abfgh'))
        assert_frame_equal(df1, df2)
Ejemplo n.º 4
0
    def test_bfillna(self):
        data = {
            'a': [4, nan, nan, nan, 3, 2],
            'b': [None, 'a', 'd', None, None, 'er'],
            'c': [nan, nan, 5, nan, 7, nan]
        }
        df = de.DataFrame(data)
        df1 = df.fillna(method='bfill')
        df2 = de.DataFrame({
            'a': [4.0, 3.0, 3.0, 3.0, 3.0, 2.0],
            'b': ['a', 'a', 'd', 'er', 'er', 'er'],
            'c': [5.0, 5.0, 5.0, 7.0, 7.0, nan]
        })
        assert_frame_equal(df1, df2)

        df1 = df.fillna(method='bfill', limit=1)
        df2 = de.DataFrame({
            'a': [4.0, nan, nan, 3.0, 3.0, 2.0],
            'b': ['a', 'a', 'd', None, 'er', 'er'],
            'c': [nan, 5.0, 5.0, 7.0, 7.0, nan]
        })
        assert_frame_equal(df1, df2)

        with pytest.raises(ValueError):
            df.fillna(method='bfill', limit=0)

        with pytest.raises(ValueError):
            df.fillna(method='bfill', limit=1, fill_function='mean')

        with pytest.raises(ValueError):
            df.fillna(values=10, method='bfill')
Ejemplo n.º 5
0
    def test_less_than_equal(self):
        with pytest.raises(TypeError):
            self.df <= 5

        with pytest.raises(TypeError):
            self.df.select_dtypes('str') <= 10

        with pytest.raises(TypeError):
            self.df <= 'asdf'

        df = dx.DataFrame({
            'a': [6, 7, 10],
            'b': [0, 2, nan],
            'f': [0, 10, 3],
            'g': np.zeros(3, dtype='int'),
            'h': [nan, nan, nan]
        })
        df1 = df <= 3

        df2 = dx.DataFrame({
            'a': [False, False, False],
            'b': [True, True, False],
            'f': [True, False, True],
            'g': np.ones(3, dtype='bool'),
            'h': [False] * 3
        })
        assert_frame_equal(df1, df2)
Ejemplo n.º 6
0
    def test_drop_rows(self):
        data = {'a': [0, 0, 5, 9],
                'b': [0, 1.5, 8, 9],
                'c': [''] + list('efs'),
                'd': [False, False, True, False],
                'e': [0, 20, 30, 4],
                'f': ['a', nan, 'ad', 'effd'],
                'g': [np.nan] * 4}
        df = de.DataFrame(data)

        df1 = df.drop(rows=3)
        df2 = de.DataFrame({'a': [0, 0, 5],
                            'b': [0.0, 1.5, 8.0],
                            'c': ['', 'e', 'f'],
                            'd': [False, False, True],
                            'e': [0, 20, 30],
                            'f': ['a', None, 'ad'],
                            'g': [nan, nan, nan]})
        assert_frame_equal(df1, df2)

        with pytest.raises(IndexError):
            df.drop(rows=5)

        df1 = df.drop(rows=[-1, 0, -3])
        df2 = de.DataFrame({'a': [5],
                            'b': [8.0],
                            'c': ['f'],
                            'd': [True],
                            'e': [30],
                            'f': ['ad'],
                            'g': [nan]})
        assert_frame_equal(df1, df2)
Ejemplo n.º 7
0
    def test_sort_values_multiple(self):
        df = de.DataFrame({'a': [2, 3, nan, 6, 3, 2],
                           'b': [None, 'f', 'd', 'f', 'd', 'er'],
                           'c': [12, 444, -5.6, 5, 1, 7]})

        df1 = df.sort_values(['a', 'b'], ascending=False)
        df2 = de.DataFrame({'a': [6.0, 3.0, 3.0, 2.0, 2.0, nan],
                            'b': ['f', 'f', 'd', 'er', None, 'd'],
                            'c': [5.0, 444.0, 1.0, 7.0, 12.0, -5.6]})
        assert_frame_equal(df1, df2)

        df1 = df.sort_values(['a', 'b'], ascending=True)
        df2 = de.DataFrame({'a': [2.0, 2.0, 3.0, 3.0, 6.0, nan],
                            'b': ['er', None, 'd', 'f', 'f', 'd'],
                            'c': [7.0, 12.0, 1.0, 444.0, 5.0, -5.6]})
        assert_frame_equal(df1, df2)

        df1 = df.sort_values(['a', 'b'], ascending=[True, False])
        df2 = de.DataFrame({'a': [2.0, 2.0, 3.0, 3.0, 6.0, nan],
                            'b': ['er', None, 'f', 'd', 'f', 'd'],
                            'c': [7.0, 12.0, 444.0, 1.0, 5.0, -5.6]})
        assert_frame_equal(df1, df2)

        df1 = df.sort_values(['a', 'b'], ascending=[False, True])
        df2 = de.DataFrame({'a': [6.0, 3.0, 3.0, 2.0, 2.0, nan],
                            'b': ['f', 'd', 'f', 'er', None, 'd'],
                            'c': [5.0, 1.0, 444.0, 7.0, 12.0, -5.6]})
        assert_frame_equal(df1, df2)

        df1 = df.sort_values(['b', 'a'], ascending=[False, True])
        df2 = de.DataFrame({'a': [3.0, 6.0, 2.0, 3.0, nan, 2.0],
                            'b': ['f', 'f', 'er', 'd', 'd', None],
                            'c': [444.0, 5.0, 7.0, 1.0, -5.6, 12.0]})
        assert_frame_equal(df1, df2)
Ejemplo n.º 8
0
    def test_truediv_float(self):
        with pytest.raises(TypeError):
            self.df / 5.

        with pytest.raises(TypeError):
            self.df.select_dtypes('str') / 10.

        with pytest.raises(TypeError):
            self.df / 'asdf'

        df1 = self.df.select_dtypes('number') / 2.
        df2 = dx.DataFrame(
            {
                'a': [0, nan, 2.5],
                'b': [0, .75, nan],
                'f': [0, 2, 2.5],
                'g': np.zeros(3),
                'h': [nan, nan, nan]
            },
            columns=list('abfgh'))
        assert_frame_equal(df1, df2)

        df1 = 10. / self.df.select_dtypes('number')
        df2 = dx.DataFrame(
            {
                'a': [np.inf, nan, 2],
                'b': [np.inf, 10 / 1.5, nan],
                'f': [np.inf, 2.5, 2],
                'g': [np.inf] * 3,
                'h': [nan, nan, nan]
            },
            columns=list('abfgh'))
        assert_frame_equal(df1, df2)
Ejemplo n.º 9
0
    def test_scalar_column_with_list_slice_row_selection(self):
        df1 = df[[4, 6], 2]
        data = {'c': ['e', 'g']}
        df2 = dx.DataFrame(data)
        assert_frame_equal(df1, df2)

        df1 = df[[4], 2]
        data = {'c': ['e']}
        df2 = dx.DataFrame(data)
        assert_frame_equal(df1, df2)

        df1 = df[[5, 2], 'f']
        data = {'f': [4.0, 3.0]}
        df2 = dx.DataFrame(data)
        assert_frame_equal(df1, df2)

        df1 = df[3:, 'f']
        data = {'f': [3.0, 11.0, 4.0, 5.0, 1.0]}
        df2 = dx.DataFrame(data)
        assert_frame_equal(df1, df2)

        df1 = df[5::-2, 'b']
        data = {'b': [3.0, 1.0, 8.0]}
        df2 = dx.DataFrame(data)
        assert_frame_equal(df1, df2)
Ejemplo n.º 10
0
    def test_add_df(self):
        df_answer = dx.DataFrame({
            'a':
            np.array([2, 4]),
            'b':
            np.array([-20, 20]),
            'c':
            np.array([3., 16.]),
            'd':
            np.array([4.6, nan]),
            'e':
            np.array(['aa', 'bb'], dtype=object),
            'f':
            np.array([True, False]),
            'g':
            np.array([0, 172800000000000], dtype='timedelta64[ns]')
        })
        assert_frame_equal(self.df + self.df, df_answer)

        df_answer = dx.DataFrame({
            'a': array([5, 7]),
            'b': array([-10, 10]),
            'c': array([3.5, 10.]),
            'd': array([0.3, nan])
        })
        df_result = self.df_number + self.df_number2
        assert_frame_equal(df_result, df_answer)
Ejemplo n.º 11
0
    def test_boolean_column_selection(self):
        data = {'a': [0, 0, 5, 9, 3, 4, 5, 1],
                'b': [0, 1.512344353, 8, 9, np.nan, 3, 2, 8],
                'c': [''] + list('bgggzgh'),
                'd': [False, False, True, False] * 2,
                'e': [0, 20, 30, 4, 5, 6, 7, 8],
                'f': [0., 3, 3, 3, 11, 4, 5, 1],
                'g': ['', None, 'ad', 'effd', 'ef', None, 'ett', 'zzzz'],
                'h': [0, 4, 5, 6, 7, 8, 9, 0],
                'i': np.array([0, 7, 6, 5, 4, 3, 2, 11]),
                'j': np.zeros(8, dtype='int'),
                'k': np.ones(8) - 1,
                'l': [np.nan] * 8}

        df = dx.DataFrame(data)
        df1 = df.select_dtypes('int')
        df_criteria = df1[1, :] == 0
        df1 = df1[:, df_criteria]
        df2 = dx.DataFrame({'a': [0, 0, 5, 9, 3, 4, 5, 1],
                            'j': np.zeros(8, dtype='int')})
        assert_frame_equal(df1, df2)

        criteria = np.array([False, False, False, True, True, False,
                             False, False, False, False, False, False])
        df1 = df[-3:, criteria]
        df2 = dx.DataFrame({'d': [False, True, False],
                            'e': [6, 7, 8]})
        assert_frame_equal(df1, df2)
Ejemplo n.º 12
0
    def test_mod_division_frame(self):
        with pytest.raises(TypeError):
            self.df % 5

        with pytest.raises(TypeError):
            self.df.select_dtypes('str') % 10

        with pytest.raises(TypeError):
            self.df % 'asdf'

        df = dx.DataFrame(
            {
                'a': [6, 7, 10],
                'b': [0, 2, nan],
                'f': [0, 10, 3],
                'g': np.zeros(3, dtype='int'),
                'h': [np.nan] * 3
            },
            columns=list('abfgh'))

        df1 = df % 3
        df2 = dx.DataFrame(
            {
                'a': [0, 1, 1],
                'b': [0, 2, nan],
                'f': [0, 1, 0],
                'g': np.zeros(3, dtype='int'),
                'h': [np.nan] * 3
            },
            columns=list('abfgh'))
        assert_frame_equal(df1, df2)
Ejemplo n.º 13
0
    def test_floordiv_frame(self):
        with pytest.raises(TypeError):
            self.df // 5

        with pytest.raises(TypeError):
            self.df.select_dtypes('str') // 10

        with pytest.raises(TypeError):
            self.df // 'asdf'

        df = dx.DataFrame(
            {
                'a': [0, 0, 10],
                'b': [0, 20, nan],
                'f': [0, 100, 10],
                'g': np.zeros(3, dtype='int'),
                'h': [np.nan] * 3
            },
            columns=list('abfgh'))

        df1 = df // 3
        df2 = dx.DataFrame(
            {
                'a': [0, 0, 3],
                'b': [0, 6, nan],
                'f': [0, 33, 3],
                'g': np.zeros(3, dtype='int'),
                'h': [np.nan] * 3
            },
            columns=list('abfgh'))
        assert_frame_equal(df1, df2)
Ejemplo n.º 14
0
    def test_mult_frame(self):
        df1 = self.df * 2
        df2 = dx.DataFrame({
            'a': [0, 0, 10],
            'b': [0, 3, nan],
            'c': ['', 'bb', 'gg'],
            'd': [0, 0, 2],
            'e': ['', None, 'adad'],
            'f': [0, 8, 10],
            'g': np.zeros(3, dtype='int'),
            'h': [np.nan] * 3
        })
        assert_frame_equal(df1, df2)

        df1 = 2 * self.df
        df2 = dx.DataFrame({
            'a': [0, 0, 10],
            'b': [0, 3, nan],
            'c': ['', 'bb', 'gg'],
            'd': [0, 0, 2],
            'e': ['', None, 'adad'],
            'f': [0, 8, 10],
            'g': np.zeros(3, dtype='int'),
            'h': [np.nan] * 3
        })
        assert_frame_equal(df1, df2)
Ejemplo n.º 15
0
    def test_greater_than_equal(self):
        with pytest.raises(TypeError):
            self.df >= 5

        with pytest.raises(TypeError):
            self.df.select_dtypes('str') >= 10

        with pytest.raises(TypeError):
            self.df >= 'asdf'

        df = dx.DataFrame(
            {
                'a': [6, 7, 10],
                'b': [0, 2, nan],
                'f': [0, 10, 3],
                'g': np.zeros(3, dtype='int'),
                'h': [nan, nan, nan]
            },
            columns=list('abfgh'))

        df1 = df >= 3
        df2 = dx.DataFrame(
            {
                'a': [True, True, True],
                'b': [False, False, False],
                'f': [False, True, True],
                'g': np.zeros(3, dtype='bool'),
                'h': [False] * 3
            },
            columns=list('abfgh'))
        assert_frame_equal(df1, df2)
Ejemplo n.º 16
0
    def test_mod_bool(self):
        some_bool = True
        with pytest.raises(TypeError):
            self.df % some_bool

        with pytest.raises(TypeError):
            self.df.select_dtypes('str') % some_bool

        with pytest.raises(TypeError):
            self.df % some_bool

        df = dx.DataFrame(
            {
                'a': [6, 7, 10],
                'b': [0, 2, nan],
                'f': [0, 10, 3],
                'g': np.zeros(3, dtype='int64'),
                'h': [nan, nan, nan]
            },
            columns=list('abfgh'))

        df1 = df % some_bool
        df2 = dx.DataFrame(
            {
                'a': [0, 0, 0],
                'b': [0, 0, nan],
                'f': [0, 0, 0],
                'g': np.zeros(3, dtype='int64'),
                'h': [nan, nan, nan]
            },
            columns=list('abfgh'))
        assert_frame_equal(df1, df2)
Ejemplo n.º 17
0
 def test_selectdtypes_str(self):
     df1 = self.df.select_dtypes('str')
     df2 = dx.DataFrame(
         {
             'c': [''] + list('bgggzgh'),
             'g': ['', None, 'ad', 'effd', 'ef', None, 'ett', 'zzzz']
         },
         columns=['c', 'g'])
     assert_frame_equal(df1, df2)
Ejemplo n.º 18
0
 def test_add_string(self):
     df_answer = dx.DataFrame({
         'a':
         array(['oneone', 'twotwo'], dtype=object),
         'b':
         array(['threethree', 'fourfour'], dtype=object)
     })
     df_result = self.df_strings + self.df_strings
     assert_frame_equal(df_answer, df_result)
Ejemplo n.º 19
0
 def test_selectdtypes_float(self):
     df1 = self.df.select_dtypes('float')
     df2 = dx.DataFrame(
         {
             'b': [0, 1.512344353, 8, 9, np.nan, 3, 2, 8],
             'f': [0., 3, 3, 3, 11, 4, 5, 1],
             'k': np.ones(8) - 1,
             'l': [np.nan] * 8
         },
         columns=list('bfkl'))
     assert_frame_equal(df1, df2)
Ejemplo n.º 20
0
    def test_add_one_col(self):
        df_answer = dx.DataFrame({
            'a': np.array([6., 4.1]),
            'b': np.array([-5., 12.1]),
            'c': np.array([6.5, 10.1]),
            'd': np.array([7.3, nan])
        })
        df_result = self.df_number + self.df_one_col
        assert_frame_equal(df_result, df_answer)

        df_result = self.df_one_col + self.df_number
        assert_frame_equal(df_result, df_answer)
Ejemplo n.º 21
0
    def test_list_of_integer_col_selection(self):
        df1 = df[:, [4, 6, 1]]
        data = {'b': [1.5, 8.0, 9.0, 1.0, 2.0, 3.0, 2.0, 8.0],
                'e': [10, 20, 30, 4, 5, 6, 7, 8],
                'g': ['x', 'y', 'x', 'x', 'y', 'y', 'x', 'y']}
        df2 = dx.DataFrame(data, columns=['e', 'g', 'b'])
        assert_frame_equal(df1, df2)

        df1 = df[:, [3]]
        data = {'d': [True, False, True, False, True, False, True, False]}
        df2 = dx.DataFrame(data)
        assert_frame_equal(df1, df2)
Ejemplo n.º 22
0
    def test_add_one_row(self):
        df_answer = dx.DataFrame({
            'a': array([6, 7]),
            'b': array([89, 109]),
            'c': array([3.6, 10.1]),
            'd': array([nan, nan])
        })
        df_result = self.df_number + self.df_one_row_number
        assert_frame_equal(df_result, df_answer)

        df_result = self.df_number + self.df_one_row_number
        assert_frame_equal(df_answer, df_result)
Ejemplo n.º 23
0
    def test_list_of_string_col_selection(self):
        df1 = df[:, ['b', 'd', 'a']]
        data = {'a': [1, 2, 5, 9, 3, 4, 5, 1],
                'b': [1.5, 8.0, 9.0, 1.0, 2.0, 3.0, 2.0, 8.0],
                'd': [True, False, True, False, True, False, True, False]}
        df2 = dx.DataFrame(data, columns=['b', 'd', 'a'])
        assert_frame_equal(df1, df2)

        df1 = df[:, ['a']]
        data = {'a': [1, 2, 5, 9, 3, 4, 5, 1]}
        df2 = dx.DataFrame(data)
        assert_frame_equal(df1, df2)
Ejemplo n.º 24
0
 def test_float_to_float(self):
     df1 = df.astype({'e': 'float'})
     df2 = dx.DataFrame({
         'a': [1, nan, 10, 0],
         'b': ['a', 'a', 'c', 'c'],
         'c': [5, 1, nan, 3],
         'd': [True, False, True, nan],
         'e': [3.2, nan, 1, 0],
         'f': np.array([5, 10, NaTdt, 4], 'datetime64[Y]'),
         'g': np.array([22, 10, NaTtd, 8], 'timedelta64[m]')
     })
     assert_frame_equal(df1, df2)
Ejemplo n.º 25
0
 def test_get_dtypes(self):
     df1 = self.df.dtypes
     df2 = dx.DataFrame(
         {
             'Column Name':
             list('abcdefghijkl'),
             'Data Type': [
                 'int', 'float', 'str', 'bool', 'int', 'float', 'str',
                 'int', 'int', 'int', 'float', 'float'
             ]
         },
         columns=['Column Name', 'Data Type'])
     assert_frame_equal(df1, df2)
Ejemplo n.º 26
0
    def test_slice_with_integers_col_selection(self):
        df1 = df[:, 3:6]
        data = {'d': [True, False, True, False, True, False, True, False],
                'e': [10, 20, 30, 4, 5, 6, 7, 8],
                'f': [1.0, 3.0, 3.0, 3.0, 11.0, 4.0, 5.0, 1.0]}
        df2 = dx.DataFrame(data, columns=['d', 'e', 'f'])
        assert_frame_equal(df1, df2)

        df1 = df[:, -4::2]
        data = {'e': [10, 20, 30, 4, 5, 6, 7, 8],
                'g': ['x', 'y', 'x', 'x', 'y', 'y', 'x', 'y']}
        df2 = dx.DataFrame(data, columns=['e', 'g'])
        assert_frame_equal(df1, df2)
Ejemplo n.º 27
0
    def test_selectdtypes_ints(self):
        df1 = self.df.select_dtypes('int')
        df2 = dx.DataFrame(
            {
                'a': [0, 0, 5, 9, 3, 4, 5, 1],
                'e': [0, 20, 30, 4, 5, 6, 7, 8],
                'h': [0, 4, 5, 6, 7, 8, 9, 0],
                'i': np.array([0, 7, 6, 5, 4, 3, 2, 11]),
                'j': np.zeros(8, dtype='int')
            },
            columns=list('aehij'))

        assert_frame_equal(df1, df2)
Ejemplo n.º 28
0
    def test_sort_values_one(self):
        data = {'a': [4, 3, nan, 6, 3, 2],
                'b': [None, 'f', 'd', 'f', 'd', 'er'],
                'c': [12, 444, -5.6, 5, 1, 7]}
        df = de.DataFrame(data)

        df1 = df.sort_values('a')
        df2 = de.DataFrame(data={'a': [2.0, 3.0, 3.0, 4.0, 6.0, nan],
                                 'b': ['er', 'f', 'd', None, 'f', 'd'],
                                 'c': [7.0, 444.0, 1.0, 12.0, 5.0, -5.6]})
        assert_frame_equal(df1, df2)

        df1 = df.sort_values('b')
        df2 = de.DataFrame({'a': [nan, 3.0, 2.0, 3.0, 6.0, 4.0],
                            'b': ['d', 'd', 'er', 'f', 'f', None],
                            'c': [-5.6, 1.0, 7.0, 444.0, 5.0, 12.0]})
        assert_frame_equal(df1, df2)

        df = de.DataFrame({'a': [2, 3, nan, 6, 3, 2],
                           'b': [None, 'f', 'd', 'f', 'd', 'er'],
                           'c': [12, 444, -5.6, 5, 1, 7]})

        df1 = df.sort_values('b', ascending=False)
        df2 = de.DataFrame({'a': [3.0, 6.0, 2.0, nan, 3.0, 2.0],
                            'b': ['f', 'f', 'er', 'd', 'd', None],
                            'c': [444.0, 5.0, 7.0, -5.6, 1.0, 12.0]})
        assert_frame_equal(df1, df2)

        df1 = df.sort_values('a', ascending=False)
        df2 = de.DataFrame({'a': [6.0, 3.0, 3.0, 2.0, 2.0, nan],
                            'b': ['f', 'f', 'd', None, 'er', 'd'],
                            'c': [5.0, 444.0, 1.0, 12.0, 7.0, -5.6]})
        assert_frame_equal(df1, df2)
Ejemplo n.º 29
0
    def test_add_string(self):
        df1 = self.df.select_dtypes('str') + 'aaa'
        df2 = dx.DataFrame({
            'c': ['aaa', 'baaa', 'gaaa'],
            'e': ['aaa', None, 'adaaa']
        })
        assert_frame_equal(df1, df2)

        df1 = 'aaa' + self.df.select_dtypes('str')
        df2 = dx.DataFrame({
            'c': ['aaa', 'aaab', 'aaag'],
            'e': ['aaa', None, 'aaaad']
        })
        assert_frame_equal(df1, df2)
Ejemplo n.º 30
0
    def test_where_string_cols(self):
        data = {'a': [9, 10, 9, 9, 10],
                'b': [0, nan, nan, 0, 1],
                'c': [''] + list('eeaz'),
                'd': [False, False, True, False, True],
                'e': [0, 20, 30, 4, 4],
                'f': ['a', nan, 'ad', None, 'ad'],
                'g': [np.nan] * 5}
        df = de.DataFrame(data)
        cond = df[:, 'e'] > 9

        df1 = df[:, ['c', 'f']].where(cond)
        df2 = de.DataFrame({'c': [None, 'e', 'e', None, None],
                            'f': [None, None, 'ad', None, None]})
        assert_frame_equal(df1, df2)

        df1 = df[:, ['c', 'f']].where(cond, 22, 99)
        df2 = de.DataFrame({'c': [99, 22, 22, 99, 99], 'f': [99, 22, 22, 99, 99]})
        assert_frame_equal(df1, df2)

        df1 = df[:, ['c', 'f']].where(cond, 't')
        df2 = de.DataFrame({'c': [None, 't', 't', None, None], 'f': [None, 't', 't', None, None]})
        assert_frame_equal(df1, df2)

        df1 = df[:, ['c', 'f']].where(cond, 't', 'y')
        df2 = de.DataFrame({'c': ['y', 't', 't', 'y', 'y'], 'f': ['y', 't', 't', 'y', 'y']})
        assert_frame_equal(df1, df2)