Example #1
0
    def test_stack_mixed_levels(self):
        columns = MultiIndex.from_tuples(
            [("A", "cat", "long"), ("B", "cat", "long"), ("A", "dog", "short"), ("B", "dog", "short")],
            names=["exp", "animal", "hair_length"],
        )
        df = DataFrame(randn(4, 4), columns=columns)

        animal_hair_stacked = df.stack(level=["animal", "hair_length"])
        exp_hair_stacked = df.stack(level=["exp", "hair_length"])

        # GH #8584: Need to check that stacking works when a number
        # is passed that is both a level name and in the range of
        # the level numbers
        df2 = df.copy()
        df2.columns.names = ["exp", "animal", 1]
        assert_frame_equal(df2.stack(level=["animal", 1]), animal_hair_stacked, check_names=False)
        assert_frame_equal(df2.stack(level=["exp", 1]), exp_hair_stacked, check_names=False)

        # When mixed types are passed and the ints are not level
        # names, raise
        self.assertRaises(ValueError, df2.stack, level=["animal", 0])

        # GH #8584: Having 0 in the level names could raise a
        # strange error about lexsort depth
        df3 = df.copy()
        df3.columns.names = ["exp", "animal", 0]
        assert_frame_equal(df3.stack(level=["animal", 0]), animal_hair_stacked, check_names=False)
    def test_drop_names(self):
        df = DataFrame([[1, 2, 3], [3, 4, 5], [5, 6, 7]],
                       index=['a', 'b', 'c'],
                       columns=['d', 'e', 'f'])
        df.index.name, df.columns.name = 'first', 'second'
        df_dropped_b = df.drop('b')
        df_dropped_e = df.drop('e', axis=1)
        df_inplace_b, df_inplace_e = df.copy(), df.copy()
        df_inplace_b.drop('b', inplace=True)
        df_inplace_e.drop('e', axis=1, inplace=True)
        for obj in (df_dropped_b, df_dropped_e, df_inplace_b, df_inplace_e):
            self.assertEqual(obj.index.name, 'first')
            self.assertEqual(obj.columns.name, 'second')
        self.assertEqual(list(df.columns), ['d', 'e', 'f'])

        self.assertRaises(ValueError, df.drop, ['g'])
        self.assertRaises(ValueError, df.drop, ['g'], 1)

        # errors = 'ignore'
        dropped = df.drop(['g'], errors='ignore')
        expected = Index(['a', 'b', 'c'], name='first')
        self.assert_index_equal(dropped.index, expected)

        dropped = df.drop(['b', 'g'], errors='ignore')
        expected = Index(['a', 'c'], name='first')
        self.assert_index_equal(dropped.index, expected)

        dropped = df.drop(['g'], axis=1, errors='ignore')
        expected = Index(['d', 'e', 'f'], name='second')
        self.assert_index_equal(dropped.columns, expected)

        dropped = df.drop(['d', 'g'], axis=1, errors='ignore')
        expected = Index(['e', 'f'], name='second')
        self.assert_index_equal(dropped.columns, expected)
    def test_drop_names(self):
        df = DataFrame([[1, 2, 3], [3, 4, 5], [5, 6, 7]], index=["a", "b", "c"], columns=["d", "e", "f"])
        df.index.name, df.columns.name = "first", "second"
        df_dropped_b = df.drop("b")
        df_dropped_e = df.drop("e", axis=1)
        df_inplace_b, df_inplace_e = df.copy(), df.copy()
        df_inplace_b.drop("b", inplace=True)
        df_inplace_e.drop("e", axis=1, inplace=True)
        for obj in (df_dropped_b, df_dropped_e, df_inplace_b, df_inplace_e):
            self.assertEqual(obj.index.name, "first")
            self.assertEqual(obj.columns.name, "second")
        self.assertEqual(list(df.columns), ["d", "e", "f"])

        self.assertRaises(ValueError, df.drop, ["g"])
        self.assertRaises(ValueError, df.drop, ["g"], 1)

        # errors = 'ignore'
        dropped = df.drop(["g"], errors="ignore")
        expected = Index(["a", "b", "c"], name="first")
        self.assert_index_equal(dropped.index, expected)

        dropped = df.drop(["b", "g"], errors="ignore")
        expected = Index(["a", "c"], name="first")
        self.assert_index_equal(dropped.index, expected)

        dropped = df.drop(["g"], axis=1, errors="ignore")
        expected = Index(["d", "e", "f"], name="second")
        self.assert_index_equal(dropped.columns, expected)

        dropped = df.drop(["d", "g"], axis=1, errors="ignore")
        expected = Index(["e", "f"], name="second")
        self.assert_index_equal(dropped.columns, expected)
Example #4
0
class Append(object):

    goal_time = 0.2

    def setup(self):
        self.df1 = DataFrame(np.random.randn(10000, 4),
                             columns=['A', 'B', 'C', 'D'])
        self.df2 = self.df1.copy()
        self.df2.index = np.arange(10000, 20000)
        self.mdf1 = self.df1.copy()
        self.mdf1['obj1'] = 'bar'
        self.mdf1['obj2'] = 'bar'
        self.mdf1['int1'] = 5
        try:
            with warnings.catch_warnings(record=True):
                self.mdf1.consolidate(inplace=True)
        except:
            pass
        self.mdf2 = self.mdf1.copy()
        self.mdf2.index = self.df2.index

    def time_append_homogenous(self):
        self.df1.append(self.df2)

    def time_append_mixed(self):
        self.mdf1.append(self.mdf2)
Example #5
0
    def test_stack_int_level_names(self):
        columns = MultiIndex.from_tuples(
            [('A', 'cat', 'long'), ('B', 'cat', 'long'),
             ('A', 'dog', 'short'), ('B', 'dog', 'short')],
            names=['exp', 'animal', 'hair_length']
        )
        df = DataFrame(randn(4, 4), columns=columns)

        exp_animal_stacked = df.stack(level=['exp', 'animal'])
        animal_hair_stacked = df.stack(level=['animal', 'hair_length'])
        exp_hair_stacked = df.stack(level=['exp', 'hair_length'])

        df2 = df.copy()
        df2.columns.names = [0, 1, 2]
        assert_frame_equal(df2.stack(level=[1, 2]), animal_hair_stacked,
                           check_names=False)
        assert_frame_equal(df2.stack(level=[0, 1]), exp_animal_stacked,
                           check_names=False)
        assert_frame_equal(df2.stack(level=[0, 2]), exp_hair_stacked,
                           check_names=False)

        # Out-of-order int column names
        df3 = df.copy()
        df3.columns.names = [2, 0, 1]
        assert_frame_equal(df3.stack(level=[0, 1]), animal_hair_stacked,
                           check_names=False)
        assert_frame_equal(df3.stack(level=[2, 0]), exp_animal_stacked,
                           check_names=False)
        assert_frame_equal(df3.stack(level=[2, 1]), exp_hair_stacked,
                           check_names=False)
Example #6
0
    def test_interp_alt_scipy(self):
        tm._skip_if_no_scipy()
        df = DataFrame({'A': [1, 2, np.nan, 4, 5, np.nan, 7],
                        'C': [1, 2, 3, 5, 8, 13, 21]})
        result = df.interpolate(method='barycentric')
        expected = df.copy()
        expected['A'].iloc[2] = 3
        expected['A'].iloc[5] = 6
        assert_frame_equal(result, expected)

        result = df.interpolate(method='barycentric', downcast='infer')
        assert_frame_equal(result, expected.astype(np.int64))

        result = df.interpolate(method='krogh')
        expectedk = df.copy()
        # expectedk['A'].iloc[2] = 3
        # expectedk['A'].iloc[5] = 6
        expectedk['A'] = expected['A']
        assert_frame_equal(result, expectedk)

        _skip_if_no_pchip()
        result = df.interpolate(method='pchip')
        expected['A'].iloc[2] = 3
        expected['A'].iloc[5] = 6.125
        assert_frame_equal(result, expected)
Example #7
0
    def test_dropna(self):
        df = DataFrame(np.random.randn(6, 4))
        df[2][:2] = np.nan

        dropped = df.dropna(axis=1)
        expected = df.loc[:, [0, 1, 3]]
        inp = df.copy()
        inp.dropna(axis=1, inplace=True)
        assert_frame_equal(dropped, expected)
        assert_frame_equal(inp, expected)

        dropped = df.dropna(axis=0)
        expected = df.loc[lrange(2, 6)]
        inp = df.copy()
        inp.dropna(axis=0, inplace=True)
        assert_frame_equal(dropped, expected)
        assert_frame_equal(inp, expected)

        # threshold
        dropped = df.dropna(axis=1, thresh=5)
        expected = df.loc[:, [0, 1, 3]]
        inp = df.copy()
        inp.dropna(axis=1, thresh=5, inplace=True)
        assert_frame_equal(dropped, expected)
        assert_frame_equal(inp, expected)

        dropped = df.dropna(axis=0, thresh=4)
        expected = df.loc[lrange(2, 6)]
        inp = df.copy()
        inp.dropna(axis=0, thresh=4, inplace=True)
        assert_frame_equal(dropped, expected)
        assert_frame_equal(inp, expected)

        dropped = df.dropna(axis=1, thresh=4)
        assert_frame_equal(dropped, df)

        dropped = df.dropna(axis=1, thresh=3)
        assert_frame_equal(dropped, df)

        # subset
        dropped = df.dropna(axis=0, subset=[0, 1, 3])
        inp = df.copy()
        inp.dropna(axis=0, subset=[0, 1, 3], inplace=True)
        assert_frame_equal(dropped, df)
        assert_frame_equal(inp, df)

        # all
        dropped = df.dropna(axis=1, how='all')
        assert_frame_equal(dropped, df)

        df[2] = np.nan
        dropped = df.dropna(axis=1, how='all')
        expected = df.loc[:, [0, 1, 3]]
        assert_frame_equal(dropped, expected)

        # bad input
        msg = ("No axis named 3 for object type"
               " <class 'pandas.core.frame.DataFrame'>")
        with pytest.raises(ValueError, match=msg):
            df.dropna(axis=3)
Example #8
0
    def test_copy(self):
        """Check inplace/copy behavior of link_df, link_df_iter"""
        # One 1D stepper
        N = 5
        f = DataFrame({'x': np.arange(N), 'y': np.ones(N), 'frame': np.arange(N)})
        f_inplace = f.copy()
        expected = f.copy()
        expected['particle'] = np.zeros(N)

        # Should add particle column in-place
        # UNLESS diagnostics are enabled (or input dataframe is not writeable)
        actual = self.link_df(f_inplace, 5)
        assert_frame_equal(actual, expected)
        if self.do_diagnostics:
            assert 'particle' not in f_inplace.columns
        else:
            assert_frame_equal(actual, f_inplace)

        # When DataFrame is actually a view, link_df should produce a warning
        # and then copy the DataFrame. This only happens for pandas >= 0.16.
        if is_pandas_recent:
            with assert_produces_warning(UserWarning):
                actual = self.link_df(f[f['frame'] > 0], 5)
            assert 'particle' not in f.columns

        # Should copy
        actual = self.link_df(f, 5, copy_features=True)
        assert_frame_equal(actual, expected)
        assert 'particle' not in f.columns

        # Should copy
        actual_iter = self.link_df_iter(f, 5, hash_size=(10, 2))
        assert_frame_equal(actual_iter, expected)
        assert 'particle' not in f.columns
    def test_append_empty_dataframe(self):

        # Empty df append empty df
        df1 = DataFrame([])
        df2 = DataFrame([])
        result = df1.append(df2)
        expected = df1.copy()
        assert_frame_equal(result, expected)

        # Non-empty df append empty df
        df1 = DataFrame(np.random.randn(5, 2))
        df2 = DataFrame()
        result = df1.append(df2)
        expected = df1.copy()
        assert_frame_equal(result, expected)

        # Empty df with columns append empty df
        df1 = DataFrame(columns=['bar', 'foo'])
        df2 = DataFrame()
        result = df1.append(df2)
        expected = df1.copy()
        assert_frame_equal(result, expected)

        # Non-Empty df with columns append empty df
        df1 = DataFrame(np.random.randn(5, 2), columns=['bar', 'foo'])
        df2 = DataFrame()
        result = df1.append(df2)
        expected = df1.copy()
        assert_frame_equal(result, expected)
Example #10
0
class Equals(object):

    def setup(self):
        N = 10**3
        self.float_df = DataFrame(np.random.randn(N, N))
        self.float_df_nan = self.float_df.copy()
        self.float_df_nan.iloc[-1, -1] = np.nan

        self.object_df = DataFrame('foo', index=range(N), columns=range(N))
        self.object_df_nan = self.object_df.copy()
        self.object_df_nan.iloc[-1, -1] = np.nan

        self.nonunique_cols = self.object_df.copy()
        self.nonunique_cols.columns = ['A'] * len(self.nonunique_cols.columns)
        self.nonunique_cols_nan = self.nonunique_cols.copy()
        self.nonunique_cols_nan.iloc[-1, -1] = np.nan

    def time_frame_float_equal(self):
        self.float_df.equals(self.float_df)

    def time_frame_float_unequal(self):
        self.float_df.equals(self.float_df_nan)

    def time_frame_nonunique_equal(self):
        self.nonunique_cols.equals(self.nonunique_cols)

    def time_frame_nonunique_unequal(self):
        self.nonunique_cols.equals(self.nonunique_cols_nan)

    def time_frame_object_equal(self):
        self.object_df.equals(self.object_df)

    def time_frame_object_unequal(self):
        self.object_df.equals(self.object_df_nan)
Example #11
0
    def test_assign(self):
        df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
        original = df.copy()
        result = df.assign(C=df.B / df.A)
        expected = df.copy()
        expected['C'] = [4, 2.5, 2]
        assert_frame_equal(result, expected)

        # lambda syntax
        result = df.assign(C=lambda x: x.B / x.A)
        assert_frame_equal(result, expected)

        # original is unmodified
        assert_frame_equal(df, original)

        # Non-Series array-like
        result = df.assign(C=[4, 2.5, 2])
        assert_frame_equal(result, expected)
        # original is unmodified
        assert_frame_equal(df, original)

        result = df.assign(B=df.B / df.A)
        expected = expected.drop('B', axis=1).rename(columns={'C': 'B'})
        assert_frame_equal(result, expected)

        # overwrite
        result = df.assign(A=df.A + df.B)
        expected = df.copy()
        expected['A'] = [5, 7, 9]
        assert_frame_equal(result, expected)

        # lambda
        result = df.assign(A=lambda x: x.A + x.B)
        assert_frame_equal(result, expected)
Example #12
0
    def test_interp_alt_scipy(self):
        tm._skip_if_no_scipy()
        df = DataFrame({'A': [1, 2, np.nan, 4, 5, np.nan, 7],
                        'C': [1, 2, 3, 5, 8, 13, 21]})
        result = df.interpolate(method='barycentric')
        expected = df.copy()
        expected.loc[2, 'A'] = 3
        expected.loc[5, 'A'] = 6
        assert_frame_equal(result, expected)

        result = df.interpolate(method='barycentric', downcast='infer')
        assert_frame_equal(result, expected.astype(np.int64))

        result = df.interpolate(method='krogh')
        expectedk = df.copy()
        expectedk['A'] = expected['A']
        assert_frame_equal(result, expectedk)

        _skip_if_no_pchip()
        import scipy
        result = df.interpolate(method='pchip')
        expected.loc[2, 'A'] = 3

        if LooseVersion(scipy.__version__) >= '0.17.0':
            expected.loc[5, 'A'] = 6.0
        else:
            expected.loc[5, 'A'] = 6.125

        assert_frame_equal(result, expected)
    def test_copy(self):
        """Check inplace/copy behavior of link_df, link_df_iter"""
        # One 1D stepper
        N = 5
        f = DataFrame({'x': np.arange(N), 'y': np.ones(N), 'frame': np.arange(N)})
        f_inplace = f.copy()
        expected = f.copy()
        expected['particle'] = np.zeros(N)

        # Should add particle column in-place
        # UNLESS diagnostics are enabled (or input dataframe is not writeable)
        actual = self.link_df(f_inplace, 5)
        assert_traj_equal(actual, expected)
        if self.do_diagnostics:
            assert 'particle' not in f_inplace.columns
        else:
            assert_traj_equal(actual, f_inplace)

        # Should copy
        actual = self.link_df(f, 5, copy_features=True)
        assert_traj_equal(actual, expected)
        assert 'particle' not in f.columns

        # Should copy
        actual_iter = self.link_df_iter(f, 5, hash_size=(10, 2))
        assert_traj_equal(actual_iter, expected)
        assert 'particle' not in f.columns
Example #14
0
    def test_frame_to_period(self):
        K = 5

        dr = date_range('1/1/2000', '1/1/2001')
        pr = period_range('1/1/2000', '1/1/2001')
        df = DataFrame(np.random.randn(len(dr), K), index=dr)
        df['mix'] = 'a'

        pts = df.to_period()
        exp = df.copy()
        exp.index = pr
        assert_frame_equal(pts, exp)

        pts = df.to_period('M')
        tm.assert_index_equal(pts.index, exp.index.asfreq('M'))

        df = df.T
        pts = df.to_period(axis=1)
        exp = df.copy()
        exp.columns = pr
        assert_frame_equal(pts, exp)

        pts = df.to_period('M', axis=1)
        tm.assert_index_equal(pts.columns, exp.columns.asfreq('M'))

        msg = ("No axis named 2 for object type"
               " <class 'pandas.core.frame.DataFrame'>")
        with pytest.raises(ValueError, match=msg):
            df.to_period(axis=2)
Example #15
0
    def test_frame_to_period(self):
        K = 5
        from pandas.tseries.period import period_range

        dr = date_range('1/1/2000', '1/1/2001')
        pr = period_range('1/1/2000', '1/1/2001')
        df = DataFrame(randn(len(dr), K), index=dr)
        df['mix'] = 'a'

        pts = df.to_period()
        exp = df.copy()
        exp.index = pr
        assert_frame_equal(pts, exp)

        pts = df.to_period('M')
        tm.assert_index_equal(pts.index, exp.index.asfreq('M'))

        df = df.T
        pts = df.to_period(axis=1)
        exp = df.copy()
        exp.columns = pr
        assert_frame_equal(pts, exp)

        pts = df.to_period('M', axis=1)
        tm.assert_index_equal(pts.columns, exp.columns.asfreq('M'))

        self.assertRaises(ValueError, df.to_period, axis=2)
Example #16
0
    def test_setitem_with_datetime_tz(self):
        # 16889
        # support .loc with alignment and tz-aware DatetimeIndex
        mask = np.array([True, False, True, False])

        idx = date_range('20010101', periods=4, tz='UTC')
        df = DataFrame({'a': np.arange(4)}, index=idx).astype('float64')

        result = df.copy()
        result.loc[mask, :] = df.loc[mask, :]
        tm.assert_frame_equal(result, df)

        result = df.copy()
        result.loc[mask] = df.loc[mask]
        tm.assert_frame_equal(result, df)

        idx = date_range('20010101', periods=4)
        df = DataFrame({'a': np.arange(4)}, index=idx).astype('float64')

        result = df.copy()
        result.loc[mask, :] = df.loc[mask, :]
        tm.assert_frame_equal(result, df)

        result = df.copy()
        result.loc[mask] = df.loc[mask]
        tm.assert_frame_equal(result, df)
Example #17
0
    def test_to_period(self):
        from pandas.tseries.period import period_range

        ts = _simple_ts('1/1/2000', '1/1/2001')

        pts = ts.to_period()
        exp = ts.copy()
        exp.index = period_range('1/1/2000', '1/1/2001')
        assert_series_equal(pts, exp)

        pts = ts.to_period('M')
        exp.index = exp.index.asfreq('M')
        tm.assert_index_equal(pts.index, exp.index.asfreq('M'))
        assert_series_equal(pts, exp)

        # GH 7606 without freq
        idx = DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03',
                             '2011-01-04'])
        exp_idx = pd.PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03',
                                  '2011-01-04'], freq='D')

        s = Series(np.random.randn(4), index=idx)
        expected = s.copy()
        expected.index = exp_idx
        assert_series_equal(s.to_period(), expected)

        df = DataFrame(np.random.randn(4, 4), index=idx, columns=idx)
        expected = df.copy()
        expected.index = exp_idx
        assert_frame_equal(df.to_period(), expected)

        expected = df.copy()
        expected.columns = exp_idx
        assert_frame_equal(df.to_period(axis=1), expected)
Example #18
0
    def test_stack_mixed_levels(self):
        columns = MultiIndex.from_tuples(
            [('A', 'cat', 'long'), ('B', 'cat', 'long'),
             ('A', 'dog', 'short'), ('B', 'dog', 'short')],
            names=['exp', 'animal', 'hair_length']
        )
        df = DataFrame(randn(4, 4), columns=columns)

        animal_hair_stacked = df.stack(level=['animal', 'hair_length'])
        exp_hair_stacked = df.stack(level=['exp', 'hair_length'])

        # GH #8584: Need to check that stacking works when a number
        # is passed that is both a level name and in the range of
        # the level numbers
        df2 = df.copy()
        df2.columns.names = ['exp', 'animal', 1]
        assert_frame_equal(df2.stack(level=['animal', 1]),
                           animal_hair_stacked, check_names=False)
        assert_frame_equal(df2.stack(level=['exp', 1]),
                           exp_hair_stacked, check_names=False)

        # When mixed types are passed and the ints are not level
        # names, raise
        pytest.raises(ValueError, df2.stack, level=['animal', 0])

        # GH #8584: Having 0 in the level names could raise a
        # strange error about lexsort depth
        df3 = df.copy()
        df3.columns.names = ['exp', 'animal', 0]
        assert_frame_equal(df3.stack(level=['animal', 0]),
                           animal_hair_stacked, check_names=False)
Example #19
0
    def test_frame_setitem_multi_column(self):
        df = DataFrame(randn(10, 4), columns=[['a', 'a', 'b', 'b'],
                                              [0, 1, 0, 1]])

        cp = df.copy()
        cp['a'] = cp['b']
        tm.assert_frame_equal(cp['a'], cp['b'])

        # set with ndarray
        cp = df.copy()
        cp['a'] = cp['b'].values
        tm.assert_frame_equal(cp['a'], cp['b'])

        # ---------------------------------------
        # #1803
        columns = MultiIndex.from_tuples([('A', '1'), ('A', '2'), ('B', '1')])
        df = DataFrame(index=[1, 3, 5], columns=columns)

        # Works, but adds a column instead of updating the two existing ones
        df['A'] = 0.0  # Doesn't work
        assert (df['A'].values == 0).all()

        # it broadcasts
        df['B', '1'] = [1, 2, 3]
        df['A'] = df['B', '1']

        sliced_a1 = df['A', '1']
        sliced_a2 = df['A', '2']
        sliced_b1 = df['B', '1']
        tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False)
        tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False)
        assert sliced_a1.name == ('A', '1')
        assert sliced_a2.name == ('A', '2')
        assert sliced_b1.name == ('B', '1')
Example #20
0
    def test_inplace_ops_identity(self):

        # GH 5104
        # make sure that we are actually changing the object
        s_orig = Series([1, 2, 3])
        df_orig = DataFrame(np.random.randint(0, 5, size=10).reshape(-1, 5))

        # no dtype change
        s = s_orig.copy()
        s2 = s
        s += 1
        assert_series_equal(s, s2)
        assert_series_equal(s_orig + 1, s)
        assert s is s2
        assert s._data is s2._data

        df = df_orig.copy()
        df2 = df
        df += 1
        assert_frame_equal(df, df2)
        assert_frame_equal(df_orig + 1, df)
        assert df is df2
        assert df._data is df2._data

        # dtype change
        s = s_orig.copy()
        s2 = s
        s += 1.5
        assert_series_equal(s, s2)
        assert_series_equal(s_orig + 1.5, s)

        df = df_orig.copy()
        df2 = df
        df += 1.5
        assert_frame_equal(df, df2)
        assert_frame_equal(df_orig + 1.5, df)
        assert df is df2
        assert df._data is df2._data

        # mixed dtype
        arr = np.random.randint(0, 10, size=5)
        df_orig = DataFrame({'A': arr.copy(), 'B': 'foo'})
        df = df_orig.copy()
        df2 = df
        df['A'] += 1
        expected = DataFrame({'A': arr.copy() + 1, 'B': 'foo'})
        assert_frame_equal(df, expected)
        assert_frame_equal(df2, expected)
        assert df._data is df2._data

        df = df_orig.copy()
        df2 = df
        df['A'] += 1.5
        expected = DataFrame({'A': arr.copy() + 1.5, 'B': 'foo'})
        assert_frame_equal(df, expected)
        assert_frame_equal(df2, expected)
        assert df._data is df2._data
Example #21
0
    def test_dropna(self):
        df = DataFrame(np.random.randn(6, 4))
        df[2][:2] = nan

        dropped = df.dropna(axis=1)
        expected = df.loc[:, [0, 1, 3]]
        inp = df.copy()
        inp.dropna(axis=1, inplace=True)
        assert_frame_equal(dropped, expected)
        assert_frame_equal(inp, expected)

        dropped = df.dropna(axis=0)
        expected = df.loc[lrange(2, 6)]
        inp = df.copy()
        inp.dropna(axis=0, inplace=True)
        assert_frame_equal(dropped, expected)
        assert_frame_equal(inp, expected)

        # threshold
        dropped = df.dropna(axis=1, thresh=5)
        expected = df.loc[:, [0, 1, 3]]
        inp = df.copy()
        inp.dropna(axis=1, thresh=5, inplace=True)
        assert_frame_equal(dropped, expected)
        assert_frame_equal(inp, expected)

        dropped = df.dropna(axis=0, thresh=4)
        expected = df.loc[lrange(2, 6)]
        inp = df.copy()
        inp.dropna(axis=0, thresh=4, inplace=True)
        assert_frame_equal(dropped, expected)
        assert_frame_equal(inp, expected)

        dropped = df.dropna(axis=1, thresh=4)
        assert_frame_equal(dropped, df)

        dropped = df.dropna(axis=1, thresh=3)
        assert_frame_equal(dropped, df)

        # subset
        dropped = df.dropna(axis=0, subset=[0, 1, 3])
        inp = df.copy()
        inp.dropna(axis=0, subset=[0, 1, 3], inplace=True)
        assert_frame_equal(dropped, df)
        assert_frame_equal(inp, df)

        # all
        dropped = df.dropna(axis=1, how='all')
        assert_frame_equal(dropped, df)

        df[2] = nan
        dropped = df.dropna(axis=1, how='all')
        expected = df.loc[:, [0, 1, 3]]
        assert_frame_equal(dropped, expected)

        # bad input
        pytest.raises(ValueError, df.dropna, axis=3)
Example #22
0
    def test_inplace_ops_identity(self):

        # GH 5104
        # make sure that we are actually changing the object
        s_orig = Series([1, 2, 3])
        df_orig = DataFrame(np.random.randint(0, 5, size=10).reshape(-1, 5))

        # no dtype change
        s = s_orig.copy()
        s2 = s
        s += 1
        assert_series_equal(s, s2)
        assert_series_equal(s_orig + 1, s)
        self.assertIs(s, s2)
        self.assertIs(s._data, s2._data)

        df = df_orig.copy()
        df2 = df
        df += 1
        assert_frame_equal(df, df2)
        assert_frame_equal(df_orig + 1, df)
        self.assertIs(df, df2)
        self.assertIs(df._data, df2._data)

        # dtype change
        s = s_orig.copy()
        s2 = s
        s += 1.5
        assert_series_equal(s, s2)
        assert_series_equal(s_orig + 1.5, s)

        df = df_orig.copy()
        df2 = df
        df += 1.5
        assert_frame_equal(df, df2)
        assert_frame_equal(df_orig + 1.5, df)
        self.assertIs(df, df2)
        self.assertIs(df._data, df2._data)

        # mixed dtype
        arr = np.random.randint(0, 10, size=5)
        df_orig = DataFrame({"A": arr.copy(), "B": "foo"})
        df = df_orig.copy()
        df2 = df
        df["A"] += 1
        expected = DataFrame({"A": arr.copy() + 1, "B": "foo"})
        assert_frame_equal(df, expected)
        assert_frame_equal(df2, expected)
        self.assertIs(df._data, df2._data)

        df = df_orig.copy()
        df2 = df
        df["A"] += 1.5
        expected = DataFrame({"A": arr.copy() + 1.5, "B": "foo"})
        assert_frame_equal(df, expected)
        assert_frame_equal(df2, expected)
        self.assertIs(df._data, df2._data)
Example #23
0
    def test_regex_replace_dict_mixed(self):
        mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', np.nan, 'd']}
        dfmix = DataFrame(mix)

        # dicts
        # single dict {re1: v1}, search the whole frame
        # need test for this...

        # list of dicts {re1: v1, re2: v2, ..., re3: v3}, search the whole
        # frame
        res = dfmix.replace({'b': r'\s*\.\s*'}, {'b': np.nan}, regex=True)
        res2 = dfmix.copy()
        res2.replace({'b': r'\s*\.\s*'}, {'b': np.nan},
                     inplace=True, regex=True)
        expec = DataFrame({'a': mix['a'], 'b': ['a', 'b', np.nan, np.nan], 'c':
                           mix['c']})
        assert_frame_equal(res, expec)
        assert_frame_equal(res2, expec)

        # list of dicts {re1: re11, re2: re12, ..., reN: re1N}, search the
        # whole frame
        res = dfmix.replace({'b': r'\s*(\.)\s*'}, {'b': r'\1ty'}, regex=True)
        res2 = dfmix.copy()
        res2.replace({'b': r'\s*(\.)\s*'}, {'b': r'\1ty'}, inplace=True,
                     regex=True)
        expec = DataFrame({'a': mix['a'], 'b': ['a', 'b', '.ty', '.ty'], 'c':
                           mix['c']})
        assert_frame_equal(res, expec)
        assert_frame_equal(res2, expec)

        res = dfmix.replace(regex={'b': r'\s*(\.)\s*'}, value={'b': r'\1ty'})
        res2 = dfmix.copy()
        res2.replace(regex={'b': r'\s*(\.)\s*'}, value={'b': r'\1ty'},
                     inplace=True)
        expec = DataFrame({'a': mix['a'], 'b': ['a', 'b', '.ty', '.ty'], 'c':
                           mix['c']})
        assert_frame_equal(res, expec)
        assert_frame_equal(res2, expec)

        # scalar -> dict
        # to_replace regex, {value: value}
        expec = DataFrame({'a': mix['a'], 'b': [np.nan, 'b', '.', '.'], 'c':
                           mix['c']})
        res = dfmix.replace('a', {'b': np.nan}, regex=True)
        res2 = dfmix.copy()
        res2.replace('a', {'b': np.nan}, regex=True, inplace=True)
        assert_frame_equal(res, expec)
        assert_frame_equal(res2, expec)

        res = dfmix.replace('a', {'b': np.nan}, regex=True)
        res2 = dfmix.copy()
        res2.replace(regex='a', value={'b': np.nan}, inplace=True)
        expec = DataFrame({'a': mix['a'], 'b': [np.nan, 'b', '.', '.'], 'c':
                           mix['c']})
        assert_frame_equal(res, expec)
        assert_frame_equal(res2, expec)
Example #24
0
    def test_interp_inplace(self):
        df = DataFrame({'a': [1., 2., np.nan, 4.]})
        expected = DataFrame({'a': [1., 2., 3., 4.]})
        result = df.copy()
        result['a'].interpolate(inplace=True)
        assert_frame_equal(result, expected)

        result = df.copy()
        result['a'].interpolate(inplace=True, downcast='infer')
        assert_frame_equal(result, expected.astype('int64'))
Example #25
0
    def test_replace_datetimetz(self):

        # GH 11326
        # behaving poorly when presented with a datetime64[ns, tz]
        df = DataFrame({'A': date_range('20130101', periods=3,
                                        tz='US/Eastern'),
                        'B': [0, np.nan, 2]})
        result = df.replace(np.nan, 1)
        expected = DataFrame({'A': date_range('20130101', periods=3,
                                              tz='US/Eastern'),
                              'B': Series([0, 1, 2], dtype='float64')})
        assert_frame_equal(result, expected)

        result = df.fillna(1)
        assert_frame_equal(result, expected)

        result = df.replace(0, np.nan)
        expected = DataFrame({'A': date_range('20130101', periods=3,
                                              tz='US/Eastern'),
                              'B': [np.nan, np.nan, 2]})
        assert_frame_equal(result, expected)

        result = df.replace(Timestamp('20130102', tz='US/Eastern'),
                            Timestamp('20130104', tz='US/Eastern'))
        expected = DataFrame({'A': [Timestamp('20130101', tz='US/Eastern'),
                                    Timestamp('20130104', tz='US/Eastern'),
                                    Timestamp('20130103', tz='US/Eastern')],
                              'B': [0, np.nan, 2]})
        assert_frame_equal(result, expected)

        result = df.copy()
        result.iloc[1, 0] = np.nan
        result = result.replace(
            {'A': pd.NaT}, Timestamp('20130104', tz='US/Eastern'))
        assert_frame_equal(result, expected)

        # coerce to object
        result = df.copy()
        result.iloc[1, 0] = np.nan
        result = result.replace(
            {'A': pd.NaT}, Timestamp('20130104', tz='US/Pacific'))
        expected = DataFrame({'A': [Timestamp('20130101', tz='US/Eastern'),
                                    Timestamp('20130104', tz='US/Pacific'),
                                    Timestamp('20130103', tz='US/Eastern')],
                              'B': [0, np.nan, 2]})
        assert_frame_equal(result, expected)

        result = df.copy()
        result.iloc[1, 0] = np.nan
        result = result.replace({'A': np.nan}, Timestamp('20130104'))
        expected = DataFrame({'A': [Timestamp('20130101', tz='US/Eastern'),
                                    Timestamp('20130104'),
                                    Timestamp('20130103', tz='US/Eastern')],
                              'B': [0, np.nan, 2]})
        assert_frame_equal(result, expected)
Example #26
0
    def test_replace_input_formats(self):
        # both dicts
        to_rep = {'A': np.nan, 'B': 0, 'C': ''}
        values = {'A': 0, 'B': -1, 'C': 'missing'}
        df = DataFrame({'A': [np.nan, 0, np.inf], 'B': [0, 2, 5],
                        'C': ['', 'asdf', 'fd']})
        filled = df.replace(to_rep, values)
        expected = {}
        for k, v in compat.iteritems(df):
            expected[k] = v.replace(to_rep[k], values[k])
        assert_frame_equal(filled, DataFrame(expected))

        result = df.replace([0, 2, 5], [5, 2, 0])
        expected = DataFrame({'A': [np.nan, 5, np.inf], 'B': [5, 2, 0],
                              'C': ['', 'asdf', 'fd']})
        assert_frame_equal(result, expected)

        # dict to scalar
        filled = df.replace(to_rep, 0)
        expected = {}
        for k, v in compat.iteritems(df):
            expected[k] = v.replace(to_rep[k], 0)
        assert_frame_equal(filled, DataFrame(expected))

        self.assertRaises(TypeError, df.replace, to_rep, [np.nan, 0, ''])

        # scalar to dict
        values = {'A': 0, 'B': -1, 'C': 'missing'}
        df = DataFrame({'A': [np.nan, 0, np.nan], 'B': [0, 2, 5],
                        'C': ['', 'asdf', 'fd']})
        filled = df.replace(np.nan, values)
        expected = {}
        for k, v in compat.iteritems(df):
            expected[k] = v.replace(np.nan, values[k])
        assert_frame_equal(filled, DataFrame(expected))

        # list to list
        to_rep = [np.nan, 0, '']
        values = [-2, -1, 'missing']
        result = df.replace(to_rep, values)
        expected = df.copy()
        for i in range(len(to_rep)):
            expected.replace(to_rep[i], values[i], inplace=True)
        assert_frame_equal(result, expected)

        self.assertRaises(ValueError, df.replace, to_rep, values[1:])

        # list to scalar
        to_rep = [np.nan, 0, '']
        result = df.replace(to_rep, -1)
        expected = df.copy()
        for i in range(len(to_rep)):
            expected.replace(to_rep[i], -1, inplace=True)
        assert_frame_equal(result, expected)
Example #27
0
    def test_multi_assign(self):

        # GH 3626, an assignment of a sub-df to a df
        df = DataFrame({'FC': ['a', 'b', 'a', 'b', 'a', 'b'],
                        'PF': [0, 0, 0, 0, 1, 1],
                        'col1': list(range(6)),
                        'col2': list(range(6, 12)),
                        })
        df.iloc[1, 0] = np.nan
        df2 = df.copy()

        mask = ~df2.FC.isna()
        cols = ['col1', 'col2']

        dft = df2 * 2
        dft.iloc[3, 3] = np.nan

        expected = DataFrame({'FC': ['a', np.nan, 'a', 'b', 'a', 'b'],
                              'PF': [0, 0, 0, 0, 1, 1],
                              'col1': Series([0, 1, 4, 6, 8, 10]),
                              'col2': [12, 7, 16, np.nan, 20, 22]})

        # frame on rhs
        df2.loc[mask, cols] = dft.loc[mask, cols]
        tm.assert_frame_equal(df2, expected)

        df2.loc[mask, cols] = dft.loc[mask, cols]
        tm.assert_frame_equal(df2, expected)

        # with an ndarray on rhs
        # coerces to float64 because values has float64 dtype
        # GH 14001
        expected = DataFrame({'FC': ['a', np.nan, 'a', 'b', 'a', 'b'],
                              'PF': [0, 0, 0, 0, 1, 1],
                              'col1': [0., 1., 4., 6., 8., 10.],
                              'col2': [12, 7, 16, np.nan, 20, 22]})
        df2 = df.copy()
        df2.loc[mask, cols] = dft.loc[mask, cols].values
        tm.assert_frame_equal(df2, expected)
        df2.loc[mask, cols] = dft.loc[mask, cols].values
        tm.assert_frame_equal(df2, expected)

        # broadcasting on the rhs is required
        df = DataFrame(dict(A=[1, 2, 0, 0, 0], B=[0, 0, 0, 10, 11], C=[
                       0, 0, 0, 10, 11], D=[3, 4, 5, 6, 7]))

        expected = df.copy()
        mask = expected['A'] == 0
        for col in ['A', 'B']:
            expected.loc[mask, col] = df['D']

        df.loc[df['A'] == 0, ['A', 'B']] = df['D']
        tm.assert_frame_equal(df, expected)
Example #28
0
    def test_ix_loc_setitem_consistency(self):

        # GH 5771
        # loc with slice and series
        s = Series(0, index=[4, 5, 6])
        s.loc[4:5] += 1
        expected = Series([1, 1, 0], index=[4, 5, 6])
        tm.assert_series_equal(s, expected)

        # GH 5928
        # chained indexing assignment
        df = DataFrame({'a': [0, 1, 2]})
        expected = df.copy()
        with catch_warnings(record=True):
            expected.ix[[0, 1, 2], 'a'] = -expected.ix[[0, 1, 2], 'a']

        with catch_warnings(record=True):
            df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]]
        tm.assert_frame_equal(df, expected)

        df = DataFrame({'a': [0, 1, 2], 'b': [0, 1, 2]})
        with catch_warnings(record=True):
            df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]].astype(
                'float64') + 0.5
        expected = DataFrame({'a': [0.5, -0.5, -1.5], 'b': [0, 1, 2]})
        tm.assert_frame_equal(df, expected)

        # GH 8607
        # ix setitem consistency
        df = DataFrame({'delta': [1174, 904, 161],
                        'elapsed': [7673, 9277, 1470],
                        'timestamp': [1413840976, 1413842580, 1413760580]})
        expected = DataFrame({'delta': [1174, 904, 161],
                              'elapsed': [7673, 9277, 1470],
                              'timestamp': pd.to_datetime(
                                  [1413840976, 1413842580, 1413760580],
                                  unit='s')
                              })

        df2 = df.copy()
        df2['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
        tm.assert_frame_equal(df2, expected)

        df2 = df.copy()
        df2.loc[:, 'timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
        tm.assert_frame_equal(df2, expected)

        df2 = df.copy()
        with catch_warnings(record=True):
            df2.ix[:, 2] = pd.to_datetime(df['timestamp'], unit='s')
        tm.assert_frame_equal(df2, expected)
Example #29
0
 def test_regex_replace_str_to_numeric(self):
     # what happens when you try to replace a numeric value with a regex?
     mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']}
     df = DataFrame(mix)
     res = df.replace(r'\s*\.\s*', 0, regex=True)
     res2 = df.copy()
     res2.replace(r'\s*\.\s*', 0, inplace=True, regex=True)
     res3 = df.copy()
     res3.replace(regex=r'\s*\.\s*', value=0, inplace=True)
     expec = DataFrame({'a': mix['a'], 'b': ['a', 'b', 0, 0], 'c':
                        mix['c']})
     assert_frame_equal(res, expec)
     assert_frame_equal(res2, expec)
     assert_frame_equal(res3, expec)
Example #30
0
 def test_regex_replace_regex_list_to_numeric(self):
     mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']}
     df = DataFrame(mix)
     res = df.replace([r'\s*\.\s*', 'b'], 0, regex=True)
     res2 = df.copy()
     res2.replace([r'\s*\.\s*', 'b'], 0, regex=True, inplace=True)
     res3 = df.copy()
     res3.replace(regex=[r'\s*\.\s*', 'b'], value=0, inplace=True)
     expec = DataFrame({'a': mix['a'], 'b': ['a', 0, 0, 0], 'c': ['a', 0,
                                                                  nan,
                                                                  'd']})
     assert_frame_equal(res, expec)
     assert_frame_equal(res2, expec)
     assert_frame_equal(res3, expec)