def test_arith_flex_frame_mixed(self, op, int_frame, mixed_int_frame, mixed_float_frame): f = getattr(operator, op) # vs mix int result = getattr(mixed_int_frame, op)(2 + mixed_int_frame) expected = f(mixed_int_frame, 2 + mixed_int_frame) # no overflow in the uint dtype = None if op in ['__sub__']: dtype = dict(B='uint64', C=None) elif op in ['__add__', '__mul__']: dtype = dict(C=None) tm.assert_frame_equal(result, expected) _check_mixed_int(result, dtype=dtype) # vs mix float result = getattr(mixed_float_frame, op)(2 * mixed_float_frame) expected = f(mixed_float_frame, 2 * mixed_float_frame) tm.assert_frame_equal(result, expected) _check_mixed_float(result, dtype=dict(C=None)) # vs plain int result = getattr(int_frame, op)(2 * int_frame) expected = f(int_frame, 2 * int_frame) tm.assert_frame_equal(result, expected)
def test_combineFrame(self): frame_copy = self.frame.reindex(self.frame.index[::2]) del frame_copy['D'] frame_copy['C'][:5] = nan added = self.frame + frame_copy tm.assert_dict_equal(added['A'].valid(), self.frame['A'] * 2, compare_keys=False) self.assertTrue( np.isnan(added['C'].reindex(frame_copy.index)[:5]).all()) # assert(False) self.assertTrue(np.isnan(added['D']).all()) self_added = self.frame + self.frame self.assertTrue(self_added.index.equals(self.frame.index)) added_rev = frame_copy + self.frame self.assertTrue(np.isnan(added['D']).all()) self.assertTrue(np.isnan(added_rev['D']).all()) # corner cases # empty plus_empty = self.frame + self.empty self.assertTrue(np.isnan(plus_empty.values).all()) empty_plus = self.empty + self.frame self.assertTrue(np.isnan(empty_plus.values).all()) empty_empty = self.empty + self.empty self.assertTrue(empty_empty.empty) # out of order reverse = self.frame.reindex(columns=self.frame.columns[::-1]) assert_frame_equal(reverse + self.frame, self.frame * 2) # mix vs float64, upcast added = self.frame + self.mixed_float _check_mixed_float(added, dtype='float64') added = self.mixed_float + self.frame _check_mixed_float(added, dtype='float64') # mix vs mix added = self.mixed_float + self.mixed_float2 _check_mixed_float(added, dtype=dict(C=None)) added = self.mixed_float2 + self.mixed_float _check_mixed_float(added, dtype=dict(C=None)) # with int added = self.frame + self.mixed_int _check_mixed_float(added, dtype='float64')
def test_fillna_mixed_float(self, mixed_float_frame): # mixed numeric (but no float16) mf = mixed_float_frame.reindex(columns=['A', 'B', 'D']) mf.loc[mf.index[-10:], 'A'] = np.nan result = mf.fillna(value=0) _check_mixed_float(result, dtype=dict(C=None)) result = mf.fillna(method='pad') _check_mixed_float(result, dtype=dict(C=None))
def test_combineFunc(self): result = self.frame * 2 self.assert_numpy_array_equal(result.values, self.frame.values * 2) # vs mix result = self.mixed_float * 2 for c, s in compat.iteritems(result): self.assert_numpy_array_equal(s.values, self.mixed_float[c].values * 2) _check_mixed_float(result, dtype=dict(C=None)) result = self.empty * 2 self.assertIs(result.index, self.empty.index) self.assertEqual(len(result.columns), 0)
def test_combineFunc(self, float_frame, mixed_float_frame): result = float_frame * 2 tm.assert_numpy_array_equal(result.values, float_frame.values * 2) # vs mix result = mixed_float_frame * 2 for c, s in compat.iteritems(result): tm.assert_numpy_array_equal( s.values, mixed_float_frame[c].values * 2) _check_mixed_float(result, dtype=dict(C=None)) result = DataFrame() * 2 assert result.index.equals(DataFrame().index) assert len(result.columns) == 0
def test_combineFrame(self, float_frame, mixed_float_frame, mixed_int_frame): frame_copy = float_frame.reindex(float_frame.index[::2]) del frame_copy['D'] frame_copy['C'][:5] = np.nan added = float_frame + frame_copy indexer = added['A'].dropna().index exp = (float_frame['A'] * 2).copy() tm.assert_series_equal(added['A'].dropna(), exp.loc[indexer]) exp.loc[~exp.index.isin(indexer)] = np.nan tm.assert_series_equal(added['A'], exp.loc[added['A'].index]) assert np.isnan(added['C'].reindex(frame_copy.index)[:5]).all() # assert(False) assert np.isnan(added['D']).all() self_added = float_frame + float_frame tm.assert_index_equal(self_added.index, float_frame.index) added_rev = frame_copy + float_frame assert np.isnan(added['D']).all() assert np.isnan(added_rev['D']).all() # corner cases # empty plus_empty = float_frame + DataFrame() assert np.isnan(plus_empty.values).all() empty_plus = DataFrame() + float_frame assert np.isnan(empty_plus.values).all() empty_empty = DataFrame() + DataFrame() assert empty_empty.empty # out of order reverse = float_frame.reindex(columns=float_frame.columns[::-1]) assert_frame_equal(reverse + float_frame, float_frame * 2) # mix vs float64, upcast added = float_frame + mixed_float_frame _check_mixed_float(added, dtype='float64') added = mixed_float_frame + float_frame _check_mixed_float(added, dtype='float64') # mix vs mix added = mixed_float_frame + mixed_float_frame _check_mixed_float(added, dtype=dict(C=None)) # with int added = float_frame + mixed_int_frame _check_mixed_float(added, dtype='float64')
def test_arith_flex_frame(self, all_arithmetic_operators, float_frame, mixed_float_frame): # one instance of parametrized fixture op = all_arithmetic_operators def f(x, y): # r-versions not in operator-stdlib; get op without "r" and invert if op.startswith('__r'): return getattr(operator, op.replace('__r', '__'))(y, x) return getattr(operator, op)(x, y) result = getattr(float_frame, op)(2 * float_frame) expected = f(float_frame, 2 * float_frame) tm.assert_frame_equal(result, expected) # vs mix float result = getattr(mixed_float_frame, op)(2 * mixed_float_frame) expected = f(mixed_float_frame, 2 * mixed_float_frame) tm.assert_frame_equal(result, expected) _check_mixed_float(result, dtype=dict(C=None))
def test_combineSeries(self, float_frame, mixed_float_frame, mixed_int_frame, datetime_frame): # Series series = float_frame.xs(float_frame.index[0]) added = float_frame + series for key, s in compat.iteritems(added): assert_series_equal(s, float_frame[key] + series[key]) larger_series = series.to_dict() larger_series['E'] = 1 larger_series = Series(larger_series) larger_added = float_frame + larger_series for key, s in compat.iteritems(float_frame): assert_series_equal(larger_added[key], s + series[key]) assert 'E' in larger_added assert np.isnan(larger_added['E']).all() # no upcast needed added = mixed_float_frame + series _check_mixed_float(added) # vs mix (upcast) as needed added = mixed_float_frame + series.astype('float32') _check_mixed_float(added, dtype=dict(C=None)) added = mixed_float_frame + series.astype('float16') _check_mixed_float(added, dtype=dict(C=None)) # these raise with numexpr.....as we are adding an int64 to an # uint64....weird vs int # added = mixed_int_frame + (100*series).astype('int64') # _check_mixed_int(added, dtype = dict(A = 'int64', B = 'float64', C = # 'int64', D = 'int64')) # added = mixed_int_frame + (100*series).astype('int32') # _check_mixed_int(added, dtype = dict(A = 'int32', B = 'float64', C = # 'int32', D = 'int64')) # TimeSeries ts = datetime_frame['A'] # 10890 # we no longer allow auto timeseries broadcasting # and require explicit broadcasting added = datetime_frame.add(ts, axis='index') for key, col in compat.iteritems(datetime_frame): result = col + ts assert_series_equal(added[key], result, check_names=False) assert added[key].name == key if col.name == ts.name: assert result.name == 'A' else: assert result.name is None smaller_frame = datetime_frame[:-5] smaller_added = smaller_frame.add(ts, axis='index') tm.assert_index_equal(smaller_added.index, datetime_frame.index) smaller_ts = ts[:-5] smaller_added2 = datetime_frame.add(smaller_ts, axis='index') assert_frame_equal(smaller_added, smaller_added2) # length 0, result is all-nan result = datetime_frame.add(ts[:0], axis='index') expected = DataFrame(np.nan, index=datetime_frame.index, columns=datetime_frame.columns) assert_frame_equal(result, expected) # Frame is all-nan result = datetime_frame[:0].add(ts, axis='index') expected = DataFrame(np.nan, index=datetime_frame.index, columns=datetime_frame.columns) assert_frame_equal(result, expected) # empty but with non-empty index frame = datetime_frame[:1].reindex(columns=[]) result = frame.mul(ts, axis='index') assert len(result) == len(ts)
def test_combineFrame(self): frame_copy = self.frame.reindex(self.frame.index[::2]) del frame_copy['D'] frame_copy['C'][:5] = nan added = self.frame + frame_copy indexer = added['A'].valid().index exp = (self.frame['A'] * 2).copy() tm.assert_series_equal(added['A'].valid(), exp.loc[indexer]) exp.loc[~exp.index.isin(indexer)] = np.nan tm.assert_series_equal(added['A'], exp.loc[added['A'].index]) self.assertTrue( np.isnan(added['C'].reindex(frame_copy.index)[:5]).all()) # assert(False) self.assertTrue(np.isnan(added['D']).all()) self_added = self.frame + self.frame self.assert_index_equal(self_added.index, self.frame.index) added_rev = frame_copy + self.frame self.assertTrue(np.isnan(added['D']).all()) self.assertTrue(np.isnan(added_rev['D']).all()) # corner cases # empty plus_empty = self.frame + self.empty self.assertTrue(np.isnan(plus_empty.values).all()) empty_plus = self.empty + self.frame self.assertTrue(np.isnan(empty_plus.values).all()) empty_empty = self.empty + self.empty self.assertTrue(empty_empty.empty) # out of order reverse = self.frame.reindex(columns=self.frame.columns[::-1]) assert_frame_equal(reverse + self.frame, self.frame * 2) # mix vs float64, upcast added = self.frame + self.mixed_float _check_mixed_float(added, dtype='float64') added = self.mixed_float + self.frame _check_mixed_float(added, dtype='float64') # mix vs mix added = self.mixed_float + self.mixed_float2 _check_mixed_float(added, dtype=dict(C=None)) added = self.mixed_float2 + self.mixed_float _check_mixed_float(added, dtype=dict(C=None)) # with int added = self.frame + self.mixed_int _check_mixed_float(added, dtype='float64')
def test_arith_flex_frame(self): ops = ['add', 'sub', 'mul', 'div', 'truediv', 'pow', 'floordiv', 'mod'] if not compat.PY3: aliases = {} else: aliases = {'div': 'truediv'} for op in ops: try: alias = aliases.get(op, op) f = getattr(operator, alias) result = getattr(self.frame, op)(2 * self.frame) exp = f(self.frame, 2 * self.frame) assert_frame_equal(result, exp) # vs mix float result = getattr(self.mixed_float, op)(2 * self.mixed_float) exp = f(self.mixed_float, 2 * self.mixed_float) assert_frame_equal(result, exp) _check_mixed_float(result, dtype=dict(C=None)) # vs mix int if op in ['add', 'sub', 'mul']: result = getattr(self.mixed_int, op)(2 + self.mixed_int) exp = f(self.mixed_int, 2 + self.mixed_int) # no overflow in the uint dtype = None if op in ['sub']: dtype = dict(B='uint64', C=None) elif op in ['add', 'mul']: dtype = dict(C=None) assert_frame_equal(result, exp) _check_mixed_int(result, dtype=dtype) # rops r_f = lambda x, y: f(y, x) result = getattr(self.frame, 'r' + op)(2 * self.frame) exp = r_f(self.frame, 2 * self.frame) assert_frame_equal(result, exp) # vs mix float result = getattr(self.mixed_float, op)( 2 * self.mixed_float) exp = f(self.mixed_float, 2 * self.mixed_float) assert_frame_equal(result, exp) _check_mixed_float(result, dtype=dict(C=None)) result = getattr(self.intframe, op)(2 * self.intframe) exp = f(self.intframe, 2 * self.intframe) assert_frame_equal(result, exp) # vs mix int if op in ['add', 'sub', 'mul']: result = getattr(self.mixed_int, op)( 2 + self.mixed_int) exp = f(self.mixed_int, 2 + self.mixed_int) # no overflow in the uint dtype = None if op in ['sub']: dtype = dict(B='uint64', C=None) elif op in ['add', 'mul']: dtype = dict(C=None) assert_frame_equal(result, exp) _check_mixed_int(result, dtype=dtype) except: printing.pprint_thing("Failing operation %r" % op) raise # ndim >= 3 ndim_5 = np.ones(self.frame.shape + (3, 4, 5)) msg = "Unable to coerce to Series/DataFrame" with assertRaisesRegexp(ValueError, msg): f(self.frame, ndim_5) with assertRaisesRegexp(ValueError, msg): getattr(self.frame, op)(ndim_5) # res_add = self.frame.add(self.frame) # res_sub = self.frame.sub(self.frame) # res_mul = self.frame.mul(self.frame) # res_div = self.frame.div(2 * self.frame) # assert_frame_equal(res_add, self.frame + self.frame) # assert_frame_equal(res_sub, self.frame - self.frame) # assert_frame_equal(res_mul, self.frame * self.frame) # assert_frame_equal(res_div, self.frame / (2 * self.frame)) const_add = self.frame.add(1) assert_frame_equal(const_add, self.frame + 1) # corner cases result = self.frame.add(self.frame[:0]) assert_frame_equal(result, self.frame * np.nan) result = self.frame[:0].add(self.frame) assert_frame_equal(result, self.frame * np.nan) with assertRaisesRegexp(NotImplementedError, 'fill_value'): self.frame.add(self.frame.iloc[0], fill_value=3) with assertRaisesRegexp(NotImplementedError, 'fill_value'): self.frame.add(self.frame.iloc[0], axis='index', fill_value=3)
def test_fillna(self): tf = self.tsframe tf.loc[tf.index[:5], 'A'] = nan tf.loc[tf.index[-5:], 'A'] = nan zero_filled = self.tsframe.fillna(0) assert (zero_filled.loc[zero_filled.index[:5], 'A'] == 0).all() padded = self.tsframe.fillna(method='pad') assert np.isnan(padded.loc[padded.index[:5], 'A']).all() assert (padded.loc[padded.index[-5:], 'A'] == padded.loc[padded.index[-5], 'A']).all() # mixed type mf = self.mixed_frame mf.loc[mf.index[5:20], 'foo'] = nan mf.loc[mf.index[-10:], 'A'] = nan result = self.mixed_frame.fillna(value=0) result = self.mixed_frame.fillna(method='pad') pytest.raises(ValueError, self.tsframe.fillna) pytest.raises(ValueError, self.tsframe.fillna, 5, method='ffill') # mixed numeric (but no float16) mf = self.mixed_float.reindex(columns=['A', 'B', 'D']) mf.loc[mf.index[-10:], 'A'] = nan result = mf.fillna(value=0) _check_mixed_float(result, dtype=dict(C=None)) result = mf.fillna(method='pad') _check_mixed_float(result, dtype=dict(C=None)) # empty frame (GH #2778) df = DataFrame(columns=['x']) for m in ['pad', 'backfill']: df.x.fillna(method=m, inplace=True) df.x.fillna(method=m) # with different dtype (GH3386) df = DataFrame([['a', 'a', np.nan, 'a'], [ 'b', 'b', np.nan, 'b'], ['c', 'c', np.nan, 'c']]) result = df.fillna({2: 'foo'}) expected = DataFrame([['a', 'a', 'foo', 'a'], ['b', 'b', 'foo', 'b'], ['c', 'c', 'foo', 'c']]) assert_frame_equal(result, expected) df.fillna({2: 'foo'}, inplace=True) assert_frame_equal(df, expected) # limit and value df = DataFrame(np.random.randn(10, 3)) df.iloc[2:7, 0] = np.nan df.iloc[3:5, 2] = np.nan expected = df.copy() expected.iloc[2, 0] = 999 expected.iloc[3, 2] = 999 result = df.fillna(999, limit=1) assert_frame_equal(result, expected) # with datelike # GH 6344 df = DataFrame({ 'Date': [pd.NaT, Timestamp("2014-1-1")], 'Date2': [Timestamp("2013-1-1"), pd.NaT] }) expected = df.copy() expected['Date'] = expected['Date'].fillna( df.loc[df.index[0], 'Date2']) result = df.fillna(value={'Date': df['Date2']}) assert_frame_equal(result, expected) # with timezone # GH 15855 df = pd.DataFrame({'A': [pd.Timestamp('2012-11-11 00:00:00+01:00'), pd.NaT]}) exp = pd.DataFrame({'A': [pd.Timestamp('2012-11-11 00:00:00+01:00'), pd.Timestamp('2012-11-11 00:00:00+01:00')]}) assert_frame_equal(df.fillna(method='pad'), exp) df = pd.DataFrame({'A': [pd.NaT, pd.Timestamp('2012-11-11 00:00:00+01:00')]}) exp = pd.DataFrame({'A': [pd.Timestamp('2012-11-11 00:00:00+01:00'), pd.Timestamp('2012-11-11 00:00:00+01:00')]}) assert_frame_equal(df.fillna(method='bfill'), exp)
def test_fillna(self): tf = self.tsframe tf.loc[tf.index[:5], 'A'] = nan tf.loc[tf.index[-5:], 'A'] = nan zero_filled = self.tsframe.fillna(0) assert (zero_filled.loc[zero_filled.index[:5], 'A'] == 0).all() padded = self.tsframe.fillna(method='pad') assert np.isnan(padded.loc[padded.index[:5], 'A']).all() assert (padded.loc[padded.index[-5:], 'A'] == padded.loc[padded.index[-5], 'A']).all() # mixed type mf = self.mixed_frame mf.loc[mf.index[5:20], 'foo'] = nan mf.loc[mf.index[-10:], 'A'] = nan result = self.mixed_frame.fillna(value=0) result = self.mixed_frame.fillna(method='pad') pytest.raises(ValueError, self.tsframe.fillna) pytest.raises(ValueError, self.tsframe.fillna, 5, method='ffill') # mixed numeric (but no float16) mf = self.mixed_float.reindex(columns=['A', 'B', 'D']) mf.loc[mf.index[-10:], 'A'] = nan result = mf.fillna(value=0) _check_mixed_float(result, dtype=dict(C=None)) result = mf.fillna(method='pad') _check_mixed_float(result, dtype=dict(C=None)) # empty frame (GH #2778) df = DataFrame(columns=['x']) for m in ['pad', 'backfill']: df.x.fillna(method=m, inplace=True) df.x.fillna(method=m) # with different dtype (GH3386) df = DataFrame([['a', 'a', np.nan, 'a'], [ 'b', 'b', np.nan, 'b'], ['c', 'c', np.nan, 'c']]) result = df.fillna({2: 'foo'}) expected = DataFrame([['a', 'a', 'foo', 'a'], ['b', 'b', 'foo', 'b'], ['c', 'c', 'foo', 'c']]) assert_frame_equal(result, expected) df.fillna({2: 'foo'}, inplace=True) assert_frame_equal(df, expected) # limit and value df = DataFrame(np.random.randn(10, 3)) df.iloc[2:7, 0] = np.nan df.iloc[3:5, 2] = np.nan expected = df.copy() expected.iloc[2, 0] = 999 expected.iloc[3, 2] = 999 result = df.fillna(999, limit=1) assert_frame_equal(result, expected) # with datelike # GH 6344 df = DataFrame({ 'Date': [pd.NaT, Timestamp("2014-1-1")], 'Date2': [Timestamp("2013-1-1"), pd.NaT] }) expected = df.copy() expected['Date'] = expected['Date'].fillna( df.loc[df.index[0], 'Date2']) result = df.fillna(value={'Date': df['Date2']}) assert_frame_equal(result, expected) # with timezone # GH 15855 df = pd.DataFrame({'A': [pd.Timestamp('2012-11-11 00:00:00+01:00'), pd.NaT]}) exp = pd.DataFrame({'A': [pd.Timestamp('2012-11-11 00:00:00+01:00'), pd.Timestamp('2012-11-11 00:00:00+01:00')]}) assert_frame_equal(df.fillna(method='pad'), exp) df = pd.DataFrame({'A': [pd.NaT, pd.Timestamp('2012-11-11 00:00:00+01:00')]}) exp = pd.DataFrame({'A': [pd.Timestamp('2012-11-11 00:00:00+01:00'), pd.Timestamp('2012-11-11 00:00:00+01:00')]}) assert_frame_equal(df.fillna(method='bfill'), exp) # with timezone in another column # GH 15522 df = pd.DataFrame({'A': pd.date_range('20130101', periods=4, tz='US/Eastern'), 'B': [1, 2, np.nan, np.nan]}) result = df.fillna(method='pad') expected = pd.DataFrame({'A': pd.date_range('20130101', periods=4, tz='US/Eastern'), 'B': [1., 2., 2., 2.]}) assert_frame_equal(result, expected)
def test_combine_series( self, float_frame, mixed_float_frame, mixed_int_frame, datetime_frame ): # Series series = float_frame.xs(float_frame.index[0]) added = float_frame + series for key, s in added.items(): tm.assert_series_equal(s, float_frame[key] + series[key]) larger_series = series.to_dict() larger_series["E"] = 1 larger_series = Series(larger_series) larger_added = float_frame + larger_series for key, s in float_frame.items(): tm.assert_series_equal(larger_added[key], s + series[key]) assert "E" in larger_added assert np.isnan(larger_added["E"]).all() # no upcast needed added = mixed_float_frame + series _check_mixed_float(added) # vs mix (upcast) as needed added = mixed_float_frame + series.astype("float32") _check_mixed_float(added, dtype=dict(C=None)) added = mixed_float_frame + series.astype("float16") _check_mixed_float(added, dtype=dict(C=None)) # FIXME: don't leave commented-out # these raise with numexpr.....as we are adding an int64 to an # uint64....weird vs int # added = mixed_int_frame + (100*series).astype('int64') # _check_mixed_int(added, dtype = dict(A = 'int64', B = 'float64', C = # 'int64', D = 'int64')) # added = mixed_int_frame + (100*series).astype('int32') # _check_mixed_int(added, dtype = dict(A = 'int32', B = 'float64', C = # 'int32', D = 'int64')) # TimeSeries ts = datetime_frame["A"] # 10890 # we no longer allow auto timeseries broadcasting # and require explicit broadcasting added = datetime_frame.add(ts, axis="index") for key, col in datetime_frame.items(): result = col + ts tm.assert_series_equal(added[key], result, check_names=False) assert added[key].name == key if col.name == ts.name: assert result.name == "A" else: assert result.name is None smaller_frame = datetime_frame[:-5] smaller_added = smaller_frame.add(ts, axis="index") tm.assert_index_equal(smaller_added.index, datetime_frame.index) smaller_ts = ts[:-5] smaller_added2 = datetime_frame.add(smaller_ts, axis="index") tm.assert_frame_equal(smaller_added, smaller_added2) # length 0, result is all-nan result = datetime_frame.add(ts[:0], axis="index") expected = DataFrame( np.nan, index=datetime_frame.index, columns=datetime_frame.columns ) tm.assert_frame_equal(result, expected) # Frame is all-nan result = datetime_frame[:0].add(ts, axis="index") expected = DataFrame( np.nan, index=datetime_frame.index, columns=datetime_frame.columns ) tm.assert_frame_equal(result, expected) # empty but with non-empty index frame = datetime_frame[:1].reindex(columns=[]) result = frame.mul(ts, axis="index") assert len(result) == len(ts)
def test_combineSeries(self): # Series series = self.frame.xs(self.frame.index[0]) added = self.frame + series for key, s in compat.iteritems(added): assert_series_equal(s, self.frame[key] + series[key]) larger_series = series.to_dict() larger_series["E"] = 1 larger_series = Series(larger_series) larger_added = self.frame + larger_series for key, s in compat.iteritems(self.frame): assert_series_equal(larger_added[key], s + series[key]) self.assertIn("E", larger_added) self.assertTrue(np.isnan(larger_added["E"]).all()) # vs mix (upcast) as needed added = self.mixed_float + series _check_mixed_float(added, dtype="float64") added = self.mixed_float + series.astype("float32") _check_mixed_float(added, dtype=dict(C=None)) added = self.mixed_float + series.astype("float16") _check_mixed_float(added, dtype=dict(C=None)) # these raise with numexpr.....as we are adding an int64 to an # uint64....weird vs int # added = self.mixed_int + (100*series).astype('int64') # _check_mixed_int(added, dtype = dict(A = 'int64', B = 'float64', C = # 'int64', D = 'int64')) # added = self.mixed_int + (100*series).astype('int32') # _check_mixed_int(added, dtype = dict(A = 'int32', B = 'float64', C = # 'int32', D = 'int64')) # TimeSeries ts = self.tsframe["A"] # 10890 # we no longer allow auto timeseries broadcasting # and require explict broadcasting added = self.tsframe.add(ts, axis="index") for key, col in compat.iteritems(self.tsframe): result = col + ts assert_series_equal(added[key], result, check_names=False) self.assertEqual(added[key].name, key) if col.name == ts.name: self.assertEqual(result.name, "A") else: self.assertTrue(result.name is None) smaller_frame = self.tsframe[:-5] smaller_added = smaller_frame.add(ts, axis="index") self.assertTrue(smaller_added.index.equals(self.tsframe.index)) smaller_ts = ts[:-5] smaller_added2 = self.tsframe.add(smaller_ts, axis="index") assert_frame_equal(smaller_added, smaller_added2) # length 0, result is all-nan result = self.tsframe.add(ts[:0], axis="index") expected = DataFrame(np.nan, index=self.tsframe.index, columns=self.tsframe.columns) assert_frame_equal(result, expected) # Frame is all-nan result = self.tsframe[:0].add(ts, axis="index") expected = DataFrame(np.nan, index=self.tsframe.index, columns=self.tsframe.columns) assert_frame_equal(result, expected) # empty but with non-empty index frame = self.tsframe[:1].reindex(columns=[]) result = frame.mul(ts, axis="index") self.assertEqual(len(result), len(ts))
def test_arith_flex_frame(self): seriesd = tm.getSeriesData() frame = pd.DataFrame(seriesd).copy() mixed_float = pd.DataFrame({'A': frame['A'].copy().astype('float32'), 'B': frame['B'].copy().astype('float32'), 'C': frame['C'].copy().astype('float16'), 'D': frame['D'].copy().astype('float64')}) intframe = pd.DataFrame({k: v.astype(int) for k, v in seriesd.items()}) mixed_int = pd.DataFrame({'A': intframe['A'].copy().astype('int32'), 'B': np.ones(len(intframe), dtype='uint64'), 'C': intframe['C'].copy().astype('uint8'), 'D': intframe['D'].copy().astype('int64')}) # force these all to int64 to avoid platform testing issues intframe = pd.DataFrame({c: s for c, s in intframe.items()}, dtype=np.int64) ops = ['add', 'sub', 'mul', 'div', 'truediv', 'pow', 'floordiv', 'mod'] if not PY3: aliases = {} else: aliases = {'div': 'truediv'} for op in ops: try: alias = aliases.get(op, op) f = getattr(operator, alias) result = getattr(frame, op)(2 * frame) exp = f(frame, 2 * frame) tm.assert_frame_equal(result, exp) # vs mix float result = getattr(mixed_float, op)(2 * mixed_float) exp = f(mixed_float, 2 * mixed_float) tm.assert_frame_equal(result, exp) _check_mixed_float(result, dtype=dict(C=None)) # vs mix int if op in ['add', 'sub', 'mul']: result = getattr(mixed_int, op)(2 + mixed_int) exp = f(mixed_int, 2 + mixed_int) # no overflow in the uint dtype = None if op in ['sub']: dtype = dict(B='uint64', C=None) elif op in ['add', 'mul']: dtype = dict(C=None) tm.assert_frame_equal(result, exp) _check_mixed_int(result, dtype=dtype) # rops r_f = lambda x, y: f(y, x) result = getattr(frame, 'r' + op)(2 * frame) exp = r_f(frame, 2 * frame) tm.assert_frame_equal(result, exp) # vs mix float result = getattr(mixed_float, op)(2 * mixed_float) exp = f(mixed_float, 2 * mixed_float) tm.assert_frame_equal(result, exp) _check_mixed_float(result, dtype=dict(C=None)) result = getattr(intframe, op)(2 * intframe) exp = f(intframe, 2 * intframe) tm.assert_frame_equal(result, exp) # vs mix int if op in ['add', 'sub', 'mul']: result = getattr(mixed_int, op)(2 + mixed_int) exp = f(mixed_int, 2 + mixed_int) # no overflow in the uint dtype = None if op in ['sub']: dtype = dict(B='uint64', C=None) elif op in ['add', 'mul']: dtype = dict(C=None) tm.assert_frame_equal(result, exp) _check_mixed_int(result, dtype=dtype) except: printing.pprint_thing("Failing operation %r" % op) raise # ndim >= 3 ndim_5 = np.ones(frame.shape + (3, 4, 5)) msg = "Unable to coerce to Series/DataFrame" with tm.assert_raises_regex(ValueError, msg): f(frame, ndim_5) with tm.assert_raises_regex(ValueError, msg): getattr(frame, op)(ndim_5) # res_add = frame.add(frame) # res_sub = frame.sub(frame) # res_mul = frame.mul(frame) # res_div = frame.div(2 * frame) # tm.assert_frame_equal(res_add, frame + frame) # tm.assert_frame_equal(res_sub, frame - frame) # tm.assert_frame_equal(res_mul, frame * frame) # tm.assert_frame_equal(res_div, frame / (2 * frame)) const_add = frame.add(1) tm.assert_frame_equal(const_add, frame + 1) # corner cases result = frame.add(frame[:0]) tm.assert_frame_equal(result, frame * np.nan) result = frame[:0].add(frame) tm.assert_frame_equal(result, frame * np.nan) with tm.assert_raises_regex(NotImplementedError, 'fill_value'): frame.add(frame.iloc[0], fill_value=3) with tm.assert_raises_regex(NotImplementedError, 'fill_value'): frame.add(frame.iloc[0], axis='index', fill_value=3)
def test_arith_flex_frame(self): ops = ["add", "sub", "mul", "div", "truediv", "pow", "floordiv", "mod"] if not compat.PY3: aliases = {} else: aliases = {"div": "truediv"} for op in ops: try: alias = aliases.get(op, op) f = getattr(operator, alias) result = getattr(self.frame, op)(2 * self.frame) exp = f(self.frame, 2 * self.frame) assert_frame_equal(result, exp) # vs mix float result = getattr(self.mixed_float, op)(2 * self.mixed_float) exp = f(self.mixed_float, 2 * self.mixed_float) assert_frame_equal(result, exp) _check_mixed_float(result, dtype=dict(C=None)) # vs mix int if op in ["add", "sub", "mul"]: result = getattr(self.mixed_int, op)(2 + self.mixed_int) exp = f(self.mixed_int, 2 + self.mixed_int) # overflow in the uint dtype = None if op in ["sub"]: dtype = dict(B="object", C=None) elif op in ["add", "mul"]: dtype = dict(C=None) assert_frame_equal(result, exp) _check_mixed_int(result, dtype=dtype) # rops r_f = lambda x, y: f(y, x) result = getattr(self.frame, "r" + op)(2 * self.frame) exp = r_f(self.frame, 2 * self.frame) assert_frame_equal(result, exp) # vs mix float result = getattr(self.mixed_float, op)(2 * self.mixed_float) exp = f(self.mixed_float, 2 * self.mixed_float) assert_frame_equal(result, exp) _check_mixed_float(result, dtype=dict(C=None)) result = getattr(self.intframe, op)(2 * self.intframe) exp = f(self.intframe, 2 * self.intframe) assert_frame_equal(result, exp) # vs mix int if op in ["add", "sub", "mul"]: result = getattr(self.mixed_int, op)(2 + self.mixed_int) exp = f(self.mixed_int, 2 + self.mixed_int) # overflow in the uint dtype = None if op in ["sub"]: dtype = dict(B="object", C=None) elif op in ["add", "mul"]: dtype = dict(C=None) assert_frame_equal(result, exp) _check_mixed_int(result, dtype=dtype) except: printing.pprint_thing("Failing operation %r" % op) raise # ndim >= 3 ndim_5 = np.ones(self.frame.shape + (3, 4, 5)) with assertRaisesRegexp(ValueError, "shape"): f(self.frame, ndim_5) with assertRaisesRegexp(ValueError, "shape"): getattr(self.frame, op)(ndim_5) # res_add = self.frame.add(self.frame) # res_sub = self.frame.sub(self.frame) # res_mul = self.frame.mul(self.frame) # res_div = self.frame.div(2 * self.frame) # assert_frame_equal(res_add, self.frame + self.frame) # assert_frame_equal(res_sub, self.frame - self.frame) # assert_frame_equal(res_mul, self.frame * self.frame) # assert_frame_equal(res_div, self.frame / (2 * self.frame)) const_add = self.frame.add(1) assert_frame_equal(const_add, self.frame + 1) # corner cases result = self.frame.add(self.frame[:0]) assert_frame_equal(result, self.frame * np.nan) result = self.frame[:0].add(self.frame) assert_frame_equal(result, self.frame * np.nan) with assertRaisesRegexp(NotImplementedError, "fill_value"): self.frame.add(self.frame.iloc[0], fill_value=3) with assertRaisesRegexp(NotImplementedError, "fill_value"): self.frame.add(self.frame.iloc[0], axis="index", fill_value=3)
def test_combineFrame(self): frame_copy = self.frame.reindex(self.frame.index[::2]) del frame_copy['D'] frame_copy['C'][:5] = nan added = self.frame + frame_copy indexer = added['A'].valid().index exp = (self.frame['A'] * 2).copy() tm.assert_series_equal(added['A'].valid(), exp.loc[indexer]) exp.loc[~exp.index.isin(indexer)] = np.nan tm.assert_series_equal(added['A'], exp.loc[added['A'].index]) assert np.isnan(added['C'].reindex(frame_copy.index)[:5]).all() # assert(False) assert np.isnan(added['D']).all() self_added = self.frame + self.frame tm.assert_index_equal(self_added.index, self.frame.index) added_rev = frame_copy + self.frame assert np.isnan(added['D']).all() assert np.isnan(added_rev['D']).all() # corner cases # empty plus_empty = self.frame + self.empty assert np.isnan(plus_empty.values).all() empty_plus = self.empty + self.frame assert np.isnan(empty_plus.values).all() empty_empty = self.empty + self.empty assert empty_empty.empty # out of order reverse = self.frame.reindex(columns=self.frame.columns[::-1]) assert_frame_equal(reverse + self.frame, self.frame * 2) # mix vs float64, upcast added = self.frame + self.mixed_float _check_mixed_float(added, dtype='float64') added = self.mixed_float + self.frame _check_mixed_float(added, dtype='float64') # mix vs mix added = self.mixed_float + self.mixed_float2 _check_mixed_float(added, dtype=dict(C=None)) added = self.mixed_float2 + self.mixed_float _check_mixed_float(added, dtype=dict(C=None)) # with int added = self.frame + self.mixed_int _check_mixed_float(added, dtype='float64')
def test_arith_flex_frame(self): ops = ['add', 'sub', 'mul', 'div', 'truediv', 'pow', 'floordiv', 'mod'] if not compat.PY3: aliases = {} else: aliases = {'div': 'truediv'} for op in ops: try: alias = aliases.get(op, op) f = getattr(operator, alias) result = getattr(self.frame, op)(2 * self.frame) exp = f(self.frame, 2 * self.frame) assert_frame_equal(result, exp) # vs mix float result = getattr(self.mixed_float, op)(2 * self.mixed_float) exp = f(self.mixed_float, 2 * self.mixed_float) assert_frame_equal(result, exp) _check_mixed_float(result, dtype=dict(C=None)) # vs mix int if op in ['add', 'sub', 'mul']: result = getattr(self.mixed_int, op)(2 + self.mixed_int) exp = f(self.mixed_int, 2 + self.mixed_int) # no overflow in the uint dtype = None if op in ['sub']: dtype = dict(B='uint64', C=None) elif op in ['add', 'mul']: dtype = dict(C=None) assert_frame_equal(result, exp) _check_mixed_int(result, dtype=dtype) # rops r_f = lambda x, y: f(y, x) result = getattr(self.frame, 'r' + op)(2 * self.frame) exp = r_f(self.frame, 2 * self.frame) assert_frame_equal(result, exp) # vs mix float result = getattr(self.mixed_float, op)(2 * self.mixed_float) exp = f(self.mixed_float, 2 * self.mixed_float) assert_frame_equal(result, exp) _check_mixed_float(result, dtype=dict(C=None)) result = getattr(self.intframe, op)(2 * self.intframe) exp = f(self.intframe, 2 * self.intframe) assert_frame_equal(result, exp) # vs mix int if op in ['add', 'sub', 'mul']: result = getattr(self.mixed_int, op)(2 + self.mixed_int) exp = f(self.mixed_int, 2 + self.mixed_int) # no overflow in the uint dtype = None if op in ['sub']: dtype = dict(B='uint64', C=None) elif op in ['add', 'mul']: dtype = dict(C=None) assert_frame_equal(result, exp) _check_mixed_int(result, dtype=dtype) except: printing.pprint_thing("Failing operation %r" % op) raise # ndim >= 3 ndim_5 = np.ones(self.frame.shape + (3, 4, 5)) msg = "Unable to coerce to Series/DataFrame" with tm.assert_raises_regex(ValueError, msg): f(self.frame, ndim_5) with tm.assert_raises_regex(ValueError, msg): getattr(self.frame, op)(ndim_5) # res_add = self.frame.add(self.frame) # res_sub = self.frame.sub(self.frame) # res_mul = self.frame.mul(self.frame) # res_div = self.frame.div(2 * self.frame) # assert_frame_equal(res_add, self.frame + self.frame) # assert_frame_equal(res_sub, self.frame - self.frame) # assert_frame_equal(res_mul, self.frame * self.frame) # assert_frame_equal(res_div, self.frame / (2 * self.frame)) const_add = self.frame.add(1) assert_frame_equal(const_add, self.frame + 1) # corner cases result = self.frame.add(self.frame[:0]) assert_frame_equal(result, self.frame * np.nan) result = self.frame[:0].add(self.frame) assert_frame_equal(result, self.frame * np.nan) with tm.assert_raises_regex(NotImplementedError, 'fill_value'): self.frame.add(self.frame.iloc[0], fill_value=3) with tm.assert_raises_regex(NotImplementedError, 'fill_value'): self.frame.add(self.frame.iloc[0], axis='index', fill_value=3)
def test_combineSeries(self): # Series series = self.frame.xs(self.frame.index[0]) added = self.frame + series for key, s in compat.iteritems(added): assert_series_equal(s, self.frame[key] + series[key]) larger_series = series.to_dict() larger_series['E'] = 1 larger_series = Series(larger_series) larger_added = self.frame + larger_series for key, s in compat.iteritems(self.frame): assert_series_equal(larger_added[key], s + series[key]) assert 'E' in larger_added assert np.isnan(larger_added['E']).all() # no upcast needed added = self.mixed_float + series _check_mixed_float(added) # vs mix (upcast) as needed added = self.mixed_float + series.astype('float32') _check_mixed_float(added, dtype=dict(C=None)) added = self.mixed_float + series.astype('float16') _check_mixed_float(added, dtype=dict(C=None)) # these raise with numexpr.....as we are adding an int64 to an # uint64....weird vs int # added = self.mixed_int + (100*series).astype('int64') # _check_mixed_int(added, dtype = dict(A = 'int64', B = 'float64', C = # 'int64', D = 'int64')) # added = self.mixed_int + (100*series).astype('int32') # _check_mixed_int(added, dtype = dict(A = 'int32', B = 'float64', C = # 'int32', D = 'int64')) # TimeSeries ts = self.tsframe['A'] # 10890 # we no longer allow auto timeseries broadcasting # and require explicit broadcasting added = self.tsframe.add(ts, axis='index') for key, col in compat.iteritems(self.tsframe): result = col + ts assert_series_equal(added[key], result, check_names=False) assert added[key].name == key if col.name == ts.name: assert result.name == 'A' else: assert result.name is None smaller_frame = self.tsframe[:-5] smaller_added = smaller_frame.add(ts, axis='index') tm.assert_index_equal(smaller_added.index, self.tsframe.index) smaller_ts = ts[:-5] smaller_added2 = self.tsframe.add(smaller_ts, axis='index') assert_frame_equal(smaller_added, smaller_added2) # length 0, result is all-nan result = self.tsframe.add(ts[:0], axis='index') expected = DataFrame(np.nan, index=self.tsframe.index, columns=self.tsframe.columns) assert_frame_equal(result, expected) # Frame is all-nan result = self.tsframe[:0].add(ts, axis='index') expected = DataFrame(np.nan, index=self.tsframe.index, columns=self.tsframe.columns) assert_frame_equal(result, expected) # empty but with non-empty index frame = self.tsframe[:1].reindex(columns=[]) result = frame.mul(ts, axis='index') assert len(result) == len(ts)
def test_fillna(self): tf = self.tsframe tf.loc[tf.index[:5], 'A'] = nan tf.loc[tf.index[-5:], 'A'] = nan zero_filled = self.tsframe.fillna(0) self.assertTrue((zero_filled.loc[zero_filled.index[:5], 'A'] == 0).all()) padded = self.tsframe.fillna(method='pad') self.assertTrue(np.isnan(padded.loc[padded.index[:5], 'A']).all()) self.assertTrue((padded.loc[padded.index[-5:], 'A'] == padded.loc[padded.index[-5], 'A']).all()) # mixed type mf = self.mixed_frame mf.loc[mf.index[5:20], 'foo'] = nan mf.loc[mf.index[-10:], 'A'] = nan result = self.mixed_frame.fillna(value=0) result = self.mixed_frame.fillna(method='pad') self.assertRaises(ValueError, self.tsframe.fillna) self.assertRaises(ValueError, self.tsframe.fillna, 5, method='ffill') # mixed numeric (but no float16) mf = self.mixed_float.reindex(columns=['A', 'B', 'D']) mf.loc[mf.index[-10:], 'A'] = nan result = mf.fillna(value=0) _check_mixed_float(result, dtype=dict(C=None)) result = mf.fillna(method='pad') _check_mixed_float(result, dtype=dict(C=None)) # empty frame (GH #2778) df = DataFrame(columns=['x']) for m in ['pad', 'backfill']: df.x.fillna(method=m, inplace=True) df.x.fillna(method=m) # with different dtype (GH3386) df = DataFrame([['a', 'a', np.nan, 'a'], ['b', 'b', np.nan, 'b'], ['c', 'c', np.nan, 'c']]) result = df.fillna({2: 'foo'}) expected = DataFrame([['a', 'a', 'foo', 'a'], ['b', 'b', 'foo', 'b'], ['c', 'c', 'foo', 'c']]) assert_frame_equal(result, expected) df.fillna({2: 'foo'}, inplace=True) assert_frame_equal(df, expected) # limit and value df = DataFrame(np.random.randn(10, 3)) df.iloc[2:7, 0] = np.nan df.iloc[3:5, 2] = np.nan expected = df.copy() expected.iloc[2, 0] = 999 expected.iloc[3, 2] = 999 result = df.fillna(999, limit=1) assert_frame_equal(result, expected) # with datelike # GH 6344 df = DataFrame({ 'Date': [pd.NaT, Timestamp("2014-1-1")], 'Date2': [Timestamp("2013-1-1"), pd.NaT] }) expected = df.copy() expected['Date'] = expected['Date'].fillna(df.loc[df.index[0], 'Date2']) result = df.fillna(value={'Date': df['Date2']}) assert_frame_equal(result, expected)