Beispiel #1
0
    def test_greater_than_equal(self):
        with pytest.raises(TypeError):
            self.df >= 5

        with pytest.raises(TypeError):
            self.df.select_dtypes('str') >= 10

        with pytest.raises(TypeError):
            self.df >= 'asdf'

        df = dx.DataFrame(
            {
                'a': [6, 7, 10],
                'b': [0, 2, nan],
                'f': [0, 10, 3],
                'g': np.zeros(3, dtype='int'),
                'h': [nan, nan, nan]
            },
            columns=list('abfgh'))

        df1 = df >= 3
        df2 = dx.DataFrame(
            {
                'a': [True, True, True],
                'b': [False, False, False],
                'f': [False, True, True],
                'g': np.zeros(3, dtype='bool'),
                'h': [False] * 3
            },
            columns=list('abfgh'))
        assert_frame_equal(df1, df2)
Beispiel #2
0
    def test_boolean_column_selection(self):
        data = {'a': [0, 0, 5, 9, 3, 4, 5, 1],
                'b': [0, 1.512344353, 8, 9, np.nan, 3, 2, 8],
                'c': [''] + list('bgggzgh'),
                'd': [False, False, True, False] * 2,
                'e': [0, 20, 30, 4, 5, 6, 7, 8],
                'f': [0., 3, 3, 3, 11, 4, 5, 1],
                'g': ['', None, 'ad', 'effd', 'ef', None, 'ett', 'zzzz'],
                'h': [0, 4, 5, 6, 7, 8, 9, 0],
                'i': np.array([0, 7, 6, 5, 4, 3, 2, 11]),
                'j': np.zeros(8, dtype='int'),
                'k': np.ones(8) - 1,
                'l': [np.nan] * 8}

        df = dx.DataFrame(data)
        df1 = df.select_dtypes('int')
        df_criteria = df1[1, :] == 0
        df1 = df1[:, df_criteria]
        df2 = dx.DataFrame({'a': [0, 0, 5, 9, 3, 4, 5, 1],
                            'j': np.zeros(8, dtype='int')})
        assert_frame_equal(df1, df2)

        criteria = np.array([False, False, False, True, True, False,
                             False, False, False, False, False, False])
        df1 = df[-3:, criteria]
        df2 = dx.DataFrame({'d': [False, True, False],
                            'e': [6, 7, 8]})
        assert_frame_equal(df1, df2)
Beispiel #3
0
    def test_mod_division_frame(self):
        with pytest.raises(TypeError):
            self.df % 5

        with pytest.raises(TypeError):
            self.df.select_dtypes('str') % 10

        with pytest.raises(TypeError):
            self.df % 'asdf'

        df = dx.DataFrame(
            {
                'a': [6, 7, 10],
                'b': [0, 2, nan],
                'f': [0, 10, 3],
                'g': np.zeros(3, dtype='int'),
                'h': [np.nan] * 3
            },
            columns=list('abfgh'))

        df1 = df % 3
        df2 = dx.DataFrame(
            {
                'a': [0, 1, 1],
                'b': [0, 2, nan],
                'f': [0, 1, 0],
                'g': np.zeros(3, dtype='int'),
                'h': [np.nan] * 3
            },
            columns=list('abfgh'))
        assert_frame_equal(df1, df2)
Beispiel #4
0
    def test_add_df(self):
        df_answer = dx.DataFrame({
            'a':
            np.array([2, 4]),
            'b':
            np.array([-20, 20]),
            'c':
            np.array([3., 16.]),
            'd':
            np.array([4.6, nan]),
            'e':
            np.array(['aa', 'bb'], dtype=object),
            'f':
            np.array([True, False]),
            'g':
            np.array([0, 172800000000000], dtype='timedelta64[ns]')
        })
        assert_frame_equal(self.df + self.df, df_answer)

        df_answer = dx.DataFrame({
            'a': array([5, 7]),
            'b': array([-10, 10]),
            'c': array([3.5, 10.]),
            'd': array([0.3, nan])
        })
        df_result = self.df_number + self.df_number2
        assert_frame_equal(df_result, df_answer)
Beispiel #5
0
    def test_floordiv_frame(self):
        with pytest.raises(TypeError):
            self.df // 5

        with pytest.raises(TypeError):
            self.df.select_dtypes('str') // 10

        with pytest.raises(TypeError):
            self.df // 'asdf'

        df = dx.DataFrame(
            {
                'a': [0, 0, 10],
                'b': [0, 20, nan],
                'f': [0, 100, 10],
                'g': np.zeros(3, dtype='int'),
                'h': [np.nan] * 3
            },
            columns=list('abfgh'))

        df1 = df // 3
        df2 = dx.DataFrame(
            {
                'a': [0, 0, 3],
                'b': [0, 6, nan],
                'f': [0, 33, 3],
                'g': np.zeros(3, dtype='int'),
                'h': [np.nan] * 3
            },
            columns=list('abfgh'))
        assert_frame_equal(df1, df2)
Beispiel #6
0
    def test_less_than_equal(self):
        with pytest.raises(TypeError):
            self.df <= 5

        with pytest.raises(TypeError):
            self.df.select_dtypes('str') <= 10

        with pytest.raises(TypeError):
            self.df <= 'asdf'

        df = dx.DataFrame({
            'a': [6, 7, 10],
            'b': [0, 2, nan],
            'f': [0, 10, 3],
            'g': np.zeros(3, dtype='int'),
            'h': [nan, nan, nan]
        })
        df1 = df <= 3

        df2 = dx.DataFrame({
            'a': [False, False, False],
            'b': [True, True, False],
            'f': [True, False, True],
            'g': np.ones(3, dtype='bool'),
            'h': [False] * 3
        })
        assert_frame_equal(df1, df2)
Beispiel #7
0
    def test_fillna(self):
        data = {
            'a': [4, nan, nan, nan, 3, 2],
            'b': [None, 'a', 'd', None, None, 'er'],
            'c': [nan, nan, 5, nan, 7, nan]
        }
        df = de.DataFrame(data)
        df1 = df.fillna(5)
        data = {
            'a': [4.0, 5.0, 5.0, 5.0, 3.0, 2.0],
            'b': [None, 'a', 'd', None, None, 'er'],
            'c': [5.0, 5.0, 5.0, 5.0, 7.0, 5.0]
        }
        df2 = de.DataFrame(data)
        assert_frame_equal(df1, df2)

        df1 = df.fillna({'a': 10, 'b': 'poop'})
        data = {
            'a': [4.0, 10.0, 10.0, 10.0, 3.0, 2.0],
            'b': ['poop', 'a', 'd', 'poop', 'poop', 'er'],
            'c': [nan, nan, 5.0, nan, 7.0, nan]
        }
        df2 = de.DataFrame(data)
        assert_frame_equal(df1, df2)

        df1 = df.fillna('dupe')
        data = {
            'a': [4.0, nan, nan, nan, 3.0, 2.0],
            'b': ['dupe', 'a', 'd', 'dupe', 'dupe', 'er'],
            'c': [nan, nan, 5.0, nan, 7.0, nan]
        }
        df2 = de.DataFrame(data)
        assert_frame_equal(df1, df2)
Beispiel #8
0
    def test_mult_frame(self):
        df1 = self.df * 2
        df2 = dx.DataFrame({
            'a': [0, 0, 10],
            'b': [0, 3, nan],
            'c': ['', 'bb', 'gg'],
            'd': [0, 0, 2],
            'e': ['', None, 'adad'],
            'f': [0, 8, 10],
            'g': np.zeros(3, dtype='int'),
            'h': [np.nan] * 3
        })
        assert_frame_equal(df1, df2)

        df1 = 2 * self.df
        df2 = dx.DataFrame({
            'a': [0, 0, 10],
            'b': [0, 3, nan],
            'c': ['', 'bb', 'gg'],
            'd': [0, 0, 2],
            'e': ['', None, 'adad'],
            'f': [0, 8, 10],
            'g': np.zeros(3, dtype='int'),
            'h': [np.nan] * 3
        })
        assert_frame_equal(df1, df2)
Beispiel #9
0
    def test_where_string_cols(self):
        data = {'a': [9, 10, 9, 9, 10],
                'b': [0, nan, nan, 0, 1],
                'c': [''] + list('eeaz'),
                'd': [False, False, True, False, True],
                'e': [0, 20, 30, 4, 4],
                'f': ['a', nan, 'ad', None, 'ad'],
                'g': [np.nan] * 5}
        df = de.DataFrame(data)
        cond = df[:, 'e'] > 9

        df1 = df[:, ['c', 'f']].where(cond)
        df2 = de.DataFrame({'c': [None, 'e', 'e', None, None],
                            'f': [None, None, 'ad', None, None]})
        assert_frame_equal(df1, df2)

        df1 = df[:, ['c', 'f']].where(cond, 22, 99)
        df2 = de.DataFrame({'c': [99, 22, 22, 99, 99], 'f': [99, 22, 22, 99, 99]})
        assert_frame_equal(df1, df2)

        df1 = df[:, ['c', 'f']].where(cond, 't')
        df2 = de.DataFrame({'c': [None, 't', 't', None, None], 'f': [None, 't', 't', None, None]})
        assert_frame_equal(df1, df2)

        df1 = df[:, ['c', 'f']].where(cond, 't', 'y')
        df2 = de.DataFrame({'c': ['y', 't', 't', 'y', 'y'], 'f': ['y', 't', 't', 'y', 'y']})
        assert_frame_equal(df1, df2)
Beispiel #10
0
    def test_mod_bool(self):
        some_bool = True
        with pytest.raises(TypeError):
            self.df % some_bool

        with pytest.raises(TypeError):
            self.df.select_dtypes('str') % some_bool

        with pytest.raises(TypeError):
            self.df % some_bool

        df = dx.DataFrame(
            {
                'a': [6, 7, 10],
                'b': [0, 2, nan],
                'f': [0, 10, 3],
                'g': np.zeros(3, dtype='int64'),
                'h': [nan, nan, nan]
            },
            columns=list('abfgh'))

        df1 = df % some_bool
        df2 = dx.DataFrame(
            {
                'a': [0, 0, 0],
                'b': [0, 0, nan],
                'f': [0, 0, 0],
                'g': np.zeros(3, dtype='int64'),
                'h': [nan, nan, nan]
            },
            columns=list('abfgh'))
        assert_frame_equal(df1, df2)
Beispiel #11
0
    def test_bfillna(self):
        data = {
            'a': [4, nan, nan, nan, 3, 2],
            'b': [None, 'a', 'd', None, None, 'er'],
            'c': [nan, nan, 5, nan, 7, nan]
        }
        df = de.DataFrame(data)
        df1 = df.fillna(method='bfill')
        df2 = de.DataFrame({
            'a': [4.0, 3.0, 3.0, 3.0, 3.0, 2.0],
            'b': ['a', 'a', 'd', 'er', 'er', 'er'],
            'c': [5.0, 5.0, 5.0, 7.0, 7.0, nan]
        })
        assert_frame_equal(df1, df2)

        df1 = df.fillna(method='bfill', limit=1)
        df2 = de.DataFrame({
            'a': [4.0, nan, nan, 3.0, 3.0, 2.0],
            'b': ['a', 'a', 'd', None, 'er', 'er'],
            'c': [nan, 5.0, 5.0, 7.0, 7.0, nan]
        })
        assert_frame_equal(df1, df2)

        with pytest.raises(ValueError):
            df.fillna(method='bfill', limit=0)

        with pytest.raises(ValueError):
            df.fillna(method='bfill', limit=1, fill_function='mean')

        with pytest.raises(ValueError):
            df.fillna(values=10, method='bfill')
Beispiel #12
0
    def test_drop_rows_and_cols(self):
        data = {'a': [0, 0, 5, 9],
                'b': [0, 1.5, 8, 9],
                'c': [''] + list('efs'),
                'd': [False, False, True, False],
                'e': [0, 20, 30, 4],
                'f': ['a', nan, 'ad', 'effd'],
                'g': [np.nan] * 4}
        df = de.DataFrame(data)

        df1 = df.drop(1, 1)
        df2 = de.DataFrame({'a': [0, 5, 9],
                            'c': ['', 'f', 's'],
                            'd': [False, True, False],
                            'e': [0, 30, 4],
                            'f': ['a', 'ad', 'effd'],
                            'g': [nan, nan, nan]})
        assert_frame_equal(df1, df2)

        df1 = df.drop(-2, list('abc'))
        df2 = de.DataFrame({'d': [False, False, False],
                            'e': [0, 20, 4],
                            'f': ['a', None, 'effd'],
                            'g': [nan, nan, nan]})
        assert_frame_equal(df1, df2)

        df1 = df.drop([0, 3], [3, 'a', -2])
        df2 = de.DataFrame({'b': [1.5, 8.0], 'c': ['e', 'f'], 'e': [20, 30], 'g': [nan, nan]})
        assert_frame_equal(df1, df2)
Beispiel #13
0
    def test_mod_float(self):
        with pytest.raises(TypeError):
            self.df % 5.

        with pytest.raises(TypeError):
            self.df.select_dtypes('str') % 10.

        with pytest.raises(TypeError):
            self.df % 'asdf'

        df = dx.DataFrame(
            {
                'a': [6., 7, 10],
                'b': [0., 2, nan],
                'f': [0., 10, 3],
                'g': np.zeros(3, dtype='float64'),
                'h': [nan, nan, nan]
            },
            columns=list('abfgh'))

        df1 = df % 3.
        df2 = dx.DataFrame(
            {
                'a': [0., 1, 1],
                'b': [0., 2, nan],
                'f': [0., 1, 0],
                'g': np.zeros(3, dtype='float64'),
                'h': [nan, nan, nan]
            },
            columns=list('abfgh'))
        assert_frame_equal(df1, df2)
Beispiel #14
0
    def test_truediv_bool(self):
        some_bool = True

        with pytest.raises(TypeError):
            self.df / some_bool

        with pytest.raises(TypeError):
            self.df.select_dtypes('str') / some_bool

        with pytest.raises(TypeError):
            self.df / some_bool

        df1 = self.df.select_dtypes('number') / some_bool
        df2 = dx.DataFrame(
            {
                'a': [0., nan, 5],
                'b': [0, 1.5, nan],
                'f': [0., 4, 5],
                'g': np.zeros(3, dtype='float64'),
                'h': [nan, nan, nan]
            },
            columns=list('abfgh'))
        assert_frame_equal(df1, df2)

        df1 = some_bool / self.df.select_dtypes('number')
        df2 = dx.DataFrame(
            {
                'a': [np.inf, nan, .2],
                'b': [np.inf, 1 / 1.5, nan],
                'f': [np.inf, .25, .2],
                'g': [np.inf] * 3,
                'h': [nan, nan, nan]
            },
            columns=list('abfgh'))
        assert_frame_equal(df1, df2)
Beispiel #15
0
    def test_truediv_float(self):
        with pytest.raises(TypeError):
            self.df / 5.

        with pytest.raises(TypeError):
            self.df.select_dtypes('str') / 10.

        with pytest.raises(TypeError):
            self.df / 'asdf'

        df1 = self.df.select_dtypes('number') / 2.
        df2 = dx.DataFrame(
            {
                'a': [0, nan, 2.5],
                'b': [0, .75, nan],
                'f': [0, 2, 2.5],
                'g': np.zeros(3),
                'h': [nan, nan, nan]
            },
            columns=list('abfgh'))
        assert_frame_equal(df1, df2)

        df1 = 10. / self.df.select_dtypes('number')
        df2 = dx.DataFrame(
            {
                'a': [np.inf, nan, 2],
                'b': [np.inf, 10 / 1.5, nan],
                'f': [np.inf, 2.5, 2],
                'g': [np.inf] * 3,
                'h': [nan, nan, nan]
            },
            columns=list('abfgh'))
        assert_frame_equal(df1, df2)
Beispiel #16
0
    def test_scalar_column_with_list_slice_row_selection(self):
        df1 = df[[4, 6], 2]
        data = {'c': ['e', 'g']}
        df2 = dx.DataFrame(data)
        assert_frame_equal(df1, df2)

        df1 = df[[4], 2]
        data = {'c': ['e']}
        df2 = dx.DataFrame(data)
        assert_frame_equal(df1, df2)

        df1 = df[[5, 2], 'f']
        data = {'f': [4.0, 3.0]}
        df2 = dx.DataFrame(data)
        assert_frame_equal(df1, df2)

        df1 = df[3:, 'f']
        data = {'f': [3.0, 11.0, 4.0, 5.0, 1.0]}
        df2 = dx.DataFrame(data)
        assert_frame_equal(df1, df2)

        df1 = df[5::-2, 'b']
        data = {'b': [3.0, 1.0, 8.0]}
        df2 = dx.DataFrame(data)
        assert_frame_equal(df1, df2)
Beispiel #17
0
    def test_drop_rows(self):
        data = {'a': [0, 0, 5, 9],
                'b': [0, 1.5, 8, 9],
                'c': [''] + list('efs'),
                'd': [False, False, True, False],
                'e': [0, 20, 30, 4],
                'f': ['a', nan, 'ad', 'effd'],
                'g': [np.nan] * 4}
        df = de.DataFrame(data)

        df1 = df.drop(rows=3)
        df2 = de.DataFrame({'a': [0, 0, 5],
                            'b': [0.0, 1.5, 8.0],
                            'c': ['', 'e', 'f'],
                            'd': [False, False, True],
                            'e': [0, 20, 30],
                            'f': ['a', None, 'ad'],
                            'g': [nan, nan, nan]})
        assert_frame_equal(df1, df2)

        with pytest.raises(IndexError):
            df.drop(rows=5)

        df1 = df.drop(rows=[-1, 0, -3])
        df2 = de.DataFrame({'a': [5],
                            'b': [8.0],
                            'c': ['f'],
                            'd': [True],
                            'e': [30],
                            'f': ['ad'],
                            'g': [nan]})
        assert_frame_equal(df1, df2)
Beispiel #18
0
    def test_where_array_xy(self):
        data = {'a': [9, 10, 9, 9, 10],
                'b': [0, nan, nan, 0, 1],
                'c': [''] + list('eeaz'),
                'd': [False, False, True, False, True],
                'e': [0, 20, 30, 4, 4],
                'f': ['a', nan, 'ad', None, 'ad'],
                'g': [np.nan] * 5}
        df = de.DataFrame(data)
        cond = df[:, 'e'] > 9

        df1 = df[:, ['c', 'f']].where(cond, np.arange(5), np.arange(10, 15))
        df2 = de.DataFrame({'c': [10, 1, 2, 13, 14], 'f': [10, 1, 2, 13, 14]})
        assert_frame_equal(df1, df2)

        df1 = df[:, ['c', 'f']].where(cond, np.arange(5), 99)
        df2 = de.DataFrame({'c': [99, 1, 2, 99, 99], 'f': [99, 1, 2, 99, 99]})
        assert_frame_equal(df1, df2)

        with pytest.raises(TypeError):
            df[:, ['c', 'f']].where(cond, np.arange(5), 'er')

        df1 = df[:, ['c', 'f']].where(cond, y='er')
        df2 = de.DataFrame({'c': ['er', 'e', 'e', 'er', 'er'], 'f': ['er', None, 'ad', 'er', 'er']})
        assert_frame_equal(df1, df2)
Beispiel #19
0
class TestValues:
    df1 = dx.DataFrame({'a': [1, 5, 7, 11], 'b': [nan, 5.4, -1.1, .045]})
    df2 = dx.DataFrame({
        'a': [1, 5, 7, 11],
        'b': [nan, 5.4, -1.1, .045],
        'c': ['ted', 'fred', 'ted', 'fred']
    })

    def test_get_values(self):
        values1 = self.df1.values
        values2 = np.array([[1, 5, 7, 11], [nan, 5.4, -1.1, .045]]).T
        assert_array_equal(values1, values2)

        a = np.random.rand(100, 5)
        df = dx.DataFrame(a)
        assert_array_equal(df.values, a)

        values1 = self.df2.values
        values2 = np.array([[1, 5, 7, 11], [nan, 5.4, -1.1, .045],
                            ['ted', 'fred', 'ted', 'fred']],
                           dtype='O').T
        assert_array_equal(values1, values2)

    def test_shape(self):
        shape = self.df1.shape
        assert shape == (4, 2)

        a = np.random.rand(100, 5)
        df = dx.DataFrame(a)
        assert df.shape == (100, 5)

    def test_size(self):
        assert (self.df1.size == 8)

        a = np.random.rand(100, 5)
        df = dx.DataFrame(a)
        assert df.size == 500

    def test_to_dict(self):
        d1 = self.df1.to_dict('array')
        d2 = {
            'a': np.array([1, 5, 7, 11]),
            'b': np.array([nan, 5.4, -1.1, .045])
        }
        for key, arr in d1.items():
            assert_array_equal(arr, d2[key])

        d1 = self.df1.to_dict('list')
        d2 = {'a': [1, 5, 7, 11], 'b': [nan, 5.4, -1.1, .045]}
        assert_dict_list(d1, d2)

    def test_copy(self):
        df2 = self.df1.copy()
        assert_frame_equal(self.df1, df2)
Beispiel #20
0
    def test_list_of_string_col_selection(self):
        df1 = df[:, ['b', 'd', 'a']]
        data = {'a': [1, 2, 5, 9, 3, 4, 5, 1],
                'b': [1.5, 8.0, 9.0, 1.0, 2.0, 3.0, 2.0, 8.0],
                'd': [True, False, True, False, True, False, True, False]}
        df2 = dx.DataFrame(data, columns=['b', 'd', 'a'])
        assert_frame_equal(df1, df2)

        df1 = df[:, ['a']]
        data = {'a': [1, 2, 5, 9, 3, 4, 5, 1]}
        df2 = dx.DataFrame(data)
        assert_frame_equal(df1, df2)
Beispiel #21
0
    def test_sub_float(self):
        some_float = 5.0
        with pytest.raises(TypeError):
            self.df - some_float

        with pytest.raises(TypeError):
            self.df.select_dtypes('str') - some_float

        df1 = self.df.select_dtypes('int') - some_float
        df2 = dx.DataFrame(
            {
                'a': [-5., nan, 0],
                'f': [-5., -1, 0],
                'g': [-5., -5, -5]
            },
            columns=['a', 'f', 'g'])
        assert_frame_equal(df1, df2)

        df1 = some_float - self.df.select_dtypes('int')
        df2 = dx.DataFrame(
            {
                'a': [5., nan, 0],
                'f': [5., 1, 0],
                'g': 5. - np.zeros(3, dtype='int')
            },
            columns=['a', 'f', 'g'])
        assert_frame_equal(df1, df2)

        df1 = self.df.select_dtypes(['number', 'bool']) - some_float
        df2 = dx.DataFrame(
            {
                'a': [-5., nan, 0],
                'b': [-5., -3.5, nan],
                'd': [nan, -5., -4],
                'f': [-5., -1, 0],
                'g': np.zeros(3, dtype='int') - some_float,
                'h': [nan, nan, nan]
            },
            columns=list('abdfgh'))
        assert_frame_equal(df1, df2)

        df1 = some_float - self.df.select_dtypes(['number', 'bool'])
        df2 = dx.DataFrame(
            {
                'a': [5., nan, 0],
                'b': [5., 3.5, nan],
                'd': [nan, 5., 4],
                'f': [5., 1, 0],
                'g': 5. - np.zeros(3, dtype='int'),
                'h': [nan, nan, nan]
            },
            columns=list('abdfgh'))
        assert_frame_equal(df1, df2)
Beispiel #22
0
    def test_list_of_integer_col_selection(self):
        df1 = df[:, [4, 6, 1]]
        data = {'b': [1.5, 8.0, 9.0, 1.0, 2.0, 3.0, 2.0, 8.0],
                'e': [10, 20, 30, 4, 5, 6, 7, 8],
                'g': ['x', 'y', 'x', 'x', 'y', 'y', 'x', 'y']}
        df2 = dx.DataFrame(data, columns=['e', 'g', 'b'])
        assert_frame_equal(df1, df2)

        df1 = df[:, [3]]
        data = {'d': [True, False, True, False, True, False, True, False]}
        df2 = dx.DataFrame(data)
        assert_frame_equal(df1, df2)
Beispiel #23
0
    def test_sub_int(self):
        with pytest.raises(TypeError):
            self.df - 5

        with pytest.raises(TypeError):
            self.df.select_dtypes('str') - 10

        df1 = self.df.select_dtypes('int') - 5
        df2 = dx.DataFrame(
            {
                'a': [-5, nan, 0],
                'f': [-5, -1, 0],
                'g': [-5, -5, -5]
            },
            columns=['a', 'f', 'g'])
        assert_frame_equal(df1, df2)

        df1 = 5 - self.df.select_dtypes('int')
        df2 = dx.DataFrame(
            {
                'a': [5, nan, 0],
                'f': [5, 1, 0],
                'g': 5 - np.zeros(3, dtype='int')
            },
            columns=['a', 'f', 'g'])
        assert_frame_equal(df1, df2)

        df1 = self.df.select_dtypes(['number', 'bool']) - 5
        df2 = dx.DataFrame(
            {
                'a': [-5, nan, 0],
                'b': [-5, -3.5, nan],
                'd': [nan, -5, -4],
                'f': [-5, -1, 0],
                'g': np.zeros(3, dtype='int') - 5,
                'h': [nan, nan, nan]
            },
            columns=list('abdfgh'))
        assert_frame_equal(df1, df2)

        df1 = 5 - self.df.select_dtypes(['number', 'bool'])
        df2 = dx.DataFrame(
            {
                'a': [5, nan, 0],
                'b': [5, 3.5, nan],
                'd': [nan, 5, 4],
                'f': [5, 1, 0],
                'g': 5 - np.zeros(3, dtype='int'),
                'h': [nan, nan, nan]
            },
            columns=list('abdfgh'))
        assert_frame_equal(df1, df2)
Beispiel #24
0
    def test_slice_with_integers_col_selection(self):
        df1 = df[:, 3:6]
        data = {'d': [True, False, True, False, True, False, True, False],
                'e': [10, 20, 30, 4, 5, 6, 7, 8],
                'f': [1.0, 3.0, 3.0, 3.0, 11.0, 4.0, 5.0, 1.0]}
        df2 = dx.DataFrame(data, columns=['d', 'e', 'f'])
        assert_frame_equal(df1, df2)

        df1 = df[:, -4::2]
        data = {'e': [10, 20, 30, 4, 5, 6, 7, 8],
                'g': ['x', 'y', 'x', 'x', 'y', 'y', 'x', 'y']}
        df2 = dx.DataFrame(data, columns=['e', 'g'])
        assert_frame_equal(df1, df2)
Beispiel #25
0
    def test_add_string(self):
        df1 = self.df.select_dtypes('str') + 'aaa'
        df2 = dx.DataFrame({
            'c': ['aaa', 'baaa', 'gaaa'],
            'e': ['aaa', None, 'adaaa']
        })
        assert_frame_equal(df1, df2)

        df1 = 'aaa' + self.df.select_dtypes('str')
        df2 = dx.DataFrame({
            'c': ['aaa', 'aaab', 'aaag'],
            'e': ['aaa', None, 'aaaad']
        })
        assert_frame_equal(df1, df2)
Beispiel #26
0
    def test_nlargest_float(self):
        data = {'a': [9, 10, 9, 9, 10],
                'b': [0, nan, nan, 0, 1],
                'c': [''] + list('eeaz'),
                'd': [False, False, True, False, True],
                'e': [0, 20, 30, 4, 4],
                'f': ['a', nan, 'ad', None, 'ad'],
                'g': [np.nan] * 5}
        df = de.DataFrame(data)

        df1 = df.nlargest(2, 'b')
        df2 = de.DataFrame({'a': [10, 9, 9],
                            'b': [1.0, 0.0, 0.0],
                            'c': ['z', '', 'a'],
                            'd': [True, False, False],
                            'e': [4, 0, 4],
                            'f': ['ad', 'a', None],
                            'g': [nan, nan, nan]})
        assert_frame_equal(df1, df2)

        df1 = df.nlargest(2, 'b', keep='first')
        df2 = de.DataFrame({'a': [10, 9],
                            'b': [1.0, 0.0],
                            'c': ['z', ''],
                            'd': [True, False],
                            'e': [4, 0],
                            'f': ['ad', 'a'],
                            'g': [nan, nan]})
        assert_frame_equal(df1, df2)

        df1 = df.nlargest(2, 'b', keep='last')
        df2 = de.DataFrame({'a': [10, 9],
                            'b': [1.0, 0.0],
                            'c': ['z', 'a'],
                            'd': [True, False],
                            'e': [4, 4],
                            'f': ['ad', None],
                            'g': [nan, nan]})
        assert_frame_equal(df1, df2)

        df1 = df.nlargest(1, 'g', keep='all')
        df2 = de.DataFrame({'a': [9],
                            'b': [0.0],
                            'c': [''],
                            'd': [False],
                            'e': [0],
                            'f': ['a'],
                            'g': [nan]})
        assert_frame_equal(df1, df2)
Beispiel #27
0
    def test_slice_labels_and_integer_col_selection(self):
        df1 = df[:, 'c':5]
        data = {'c': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'],
                'd': [True, False, True, False, True, False, True, False],
                'e': [10, 20, 30, 4, 5, 6, 7, 8]}
        df2 = dx.DataFrame(data, columns=['c', 'd', 'e'])
        assert_frame_equal(df1, df2)

        df1 = df[:, 6:'d':-1]
        data = {'d': [True, False, True, False, True, False, True, False],
                'e': [10, 20, 30, 4, 5, 6, 7, 8],
                'f': [1.0, 3.0, 3.0, 3.0, 11.0, 4.0, 5.0, 1.0],
                'g': ['x', 'y', 'x', 'x', 'y', 'y', 'x', 'y']}
        df2 = dx.DataFrame(data, columns=['g', 'f', 'e', 'd'])
        assert_frame_equal(df1, df2)
Beispiel #28
0
    def test_add_float(self):
        some_float = 5.0
        with pytest.raises(TypeError):
            self.df + some_float

        df1 = self.df.select_dtypes('int') + some_float
        df2 = dx.DataFrame(
            {
                'a': [5., nan, 10],
                'f': [5., 9, 10],
                'g': [5., 5, 5]
            },
            columns=['a', 'f', 'g'])
        assert_frame_equal(df1, df2)

        df1 = some_float + self.df.select_dtypes('int')
        assert_frame_equal(df1, df2)

        df1 = self.df.select_dtypes('number') + some_float
        df2 = dx.DataFrame(
            {
                'a': [5., nan, 10],
                'b': [5., 6.5, nan],
                'f': [5., 9, 10],
                'g': [5., 5, 5],
                'h': [nan, nan, nan]
            },
            columns=list('abfgh'))
        assert_frame_equal(df1, df2)

        df1 = some_float + self.df.select_dtypes('number')
        assert_frame_equal(df1, df2)

        df1 = self.df.select_dtypes(['number', 'bool']) + some_float
        df2 = dx.DataFrame(
            {
                'a': [5., nan, 10],
                'b': [5., 6.5, nan],
                'd': [nan, 5., 6],
                'f': [5., 9, 10],
                'g': [5., 5, 5],
                'h': [nan, nan, nan]
            },
            columns=list('abdfgh'))
        assert_frame_equal(df1, df2)

        df1 = some_float + self.df.select_dtypes(['number', 'bool'])
        assert_frame_equal(df1, df2)
Beispiel #29
0
    def test_integer_condition(self):
        criteria = self.df[:, 'a'] > 4
        df1 = self.df[criteria, :]
        df2 = self.df[[2, 3, 6], :]
        assert_frame_equal(df1, df2)

        criteria = self.df[:, 'a'] == 0
        df1 = self.df[criteria, :]
        df2 = self.df[[0, 1], :]
        assert_frame_equal(df1, df2)

        criteria = (self.df[:, 'a'] > 2) & (self.df[:, 'i'] < 6)
        df1 = self.df[criteria, :]
        df2 = self.df[[3, 4, 5, 6], :]
        assert_frame_equal(df1, df2)

        criteria = (self.df[:, 'a'] > 2) | (self.df[:, 'i'] < 6)
        df1 = self.df[criteria, :]
        df2 = self.df[[0, 2, 3, 4, 5, 6], :]
        assert_frame_equal(df1, df2)

        criteria = ~((self.df[:, 'a'] > 2) | (self.df[:, 'i'] < 6))
        df1 = self.df[criteria, :]
        df2 = self.df[[1, 7], :]
        assert_frame_equal(df1, df2)

        criteria = ~((self.df[:, 'a'] > 2) | (self.df[:, 'i'] < 6))
        df1 = self.df[criteria, ['d', 'b']]
        df2 = dx.DataFrame({'b': [1.512344353, 8],
                            'd': [False, False]}, columns=['d', 'b'])
        assert_frame_equal(df1, df2)
Beispiel #30
0
    def test_factorize(self):
        data = {'a': [9, 10, 9, 9, 10],
                'b': [0, nan, nan, 0, 1],
                'c': [''] + list('eeaz'),
                'd': [False, False, True, False, True],
                'e': [0, 20, 30, 4, 4],
                'f': ['a', nan, 'ad', None, 'ad'],
                'g': [np.nan] * 5}
        df = de.DataFrame(data)

        arr11, arr12 = df.factorize('a')
        arr21, arr22 = (array([0, 1, 0, 0, 1]), array([9, 10]))

        assert_array_equal(arr11, arr21)
        assert_array_equal(arr12, arr22)

        arr11, arr12 = df.factorize('b')
        arr21, arr22 = (array([0, 1, 1, 0, 2]), array([0., nan, 1.]))

        assert_array_equal(arr11, arr21)
        assert_array_equal(arr12, arr22)

        arr11, arr12 = df.factorize('c')
        arr21, arr22 = (array([0, 1, 1, 2, 3]), array(['', 'e', 'a', 'z'], dtype=object))

        assert_array_equal(arr11, arr21)
        assert_array_equal(arr12, arr22)

        arr11, arr12 = df.factorize('d')
        arr21, arr22 = (array([0, 0, 1, 0, 1]), array([False, True]))

        assert_array_equal(arr11, arr21)
        assert_array_equal(arr12, arr22)