def test_thorough_mangle_names(self): # see gh-17095 data = "a,b,b\n1,2,3" names = ["a.1", "a.1", "a.1.1"] with tm.assert_produces_warning(UserWarning, check_stacklevel=False): df = self.read_csv(StringIO(data), sep=",", names=names, mangle_dupe_cols=True) assert list(df.columns) == ["a.1", "a.1.1", "a.1.1.1"] data = "a,b,c,d,e,f\n1,2,3,4,5,6" names = ["a", "a", "a.1", "a.1.1", "a.1.1.1", "a.1.1.1.1"] with tm.assert_produces_warning(UserWarning, check_stacklevel=False): df = self.read_csv(StringIO(data), sep=",", names=names, mangle_dupe_cols=True) assert list(df.columns) == ["a", "a.1", "a.1.1", "a.1.1.1", "a.1.1.1.1", "a.1.1.1.1.1"] data = "a,b,c,d,e,f,g\n1,2,3,4,5,6,7" names = ["a", "a", "a.3", "a.1", "a.2", "a", "a"] with tm.assert_produces_warning(UserWarning, check_stacklevel=False): df = self.read_csv(StringIO(data), sep=",", names=names, mangle_dupe_cols=True) assert list(df.columns) == ["a", "a.1", "a.3", "a.1.1", "a.2", "a.2.1", "a.3.1"]
def test_excel_deprecated_options(self): with ensure_clean(self.ext) as path: with tm.assert_produces_warning(FutureWarning): self.frame.to_excel(path, 'test1', cols=['A', 'B']) with tm.assert_produces_warning(False): self.frame.to_excel(path, 'test1', columns=['A', 'B'])
def test_scatter_matrix_axis(self): scatter_matrix = plotting.scatter_matrix with tm.RNGContext(42): df = DataFrame(randn(100, 3)) # we are plotting multiples on a sub-plot with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(scatter_matrix, filterwarnings='always', frame=df, range_padding=.1) axes0_labels = axes[0][0].yaxis.get_majorticklabels() # GH 5662 if self.mpl_ge_2_0_0: expected = ['-2', '0', '2'] else: expected = ['-2', '-1', '0', '1', '2'] self._check_text_labels(axes0_labels, expected) self._check_ticks_props( axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) df[0] = ((df[0] - 2) / 3) # we are plotting multiples on a sub-plot with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(scatter_matrix, filterwarnings='always', frame=df, range_padding=.1) axes0_labels = axes[0][0].yaxis.get_majorticklabels() if self.mpl_ge_2_0_0: expected = ['-1.0', '-0.5', '0.0'] else: expected = ['-1.2', '-1.0', '-0.8', '-0.6', '-0.4', '-0.2', '0.0'] self._check_text_labels(axes0_labels, expected) self._check_ticks_props( axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0)
def test_pi_ops_nat(self): idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], freq='M', name='idx') expected = PeriodIndex(['2011-03', '2011-04', 'NaT', '2011-06'], freq='M', name='idx') with tm.assert_produces_warning(FutureWarning, check_stacklevel=False, clear=[pd.core.arrays.datetimelike]): self._check(idx, lambda x: x + 2, expected) self._check(idx, lambda x: 2 + x, expected) self._check(idx, lambda x: np.add(x, 2), expected) self._check(idx + 2, lambda x: x - 2, idx) self._check(idx + 2, lambda x: np.subtract(x, 2), idx) # freq with mult idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], freq='2M', name='idx') expected = PeriodIndex(['2011-07', '2011-08', 'NaT', '2011-10'], freq='2M', name='idx') with tm.assert_produces_warning(FutureWarning, check_stacklevel=False, clear=[pd.core.arrays.datetimelike]): self._check(idx, lambda x: x + 3, expected) self._check(idx, lambda x: 3 + x, expected) self._check(idx, lambda x: np.add(x, 3), expected) self._check(idx + 3, lambda x: x - 3, idx) self._check(idx + 3, lambda x: np.subtract(x, 3), idx)
def test_sort_index_multicolumn(self): import random A = np.arange(5).repeat(20) B = np.tile(np.arange(5), 20) random.shuffle(A) random.shuffle(B) frame = DataFrame({'A': A, 'B': B, 'C': np.random.randn(100)}) # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): frame.sort_index(by=['A', 'B']) result = frame.sort_values(by=['A', 'B']) indexer = np.lexsort((frame['B'], frame['A'])) expected = frame.take(indexer) assert_frame_equal(result, expected) # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): frame.sort_index(by=['A', 'B'], ascending=False) result = frame.sort_values(by=['A', 'B'], ascending=False) indexer = np.lexsort((frame['B'].rank(ascending=False), frame['A'].rank(ascending=False))) expected = frame.take(indexer) assert_frame_equal(result, expected) # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): frame.sort_index(by=['B', 'A']) result = frame.sort_values(by=['B', 'A']) indexer = np.lexsort((frame['A'], frame['B'])) expected = frame.take(indexer) assert_frame_equal(result, expected)
def test_is_dtype_no_warning(check): data = pd.DataFrame({"A": [1, 2]}) with tm.assert_produces_warning(None): check(data) with tm.assert_produces_warning(None): check(data["A"])
def test_constructor_generic_timestamp_deprecated(self): # see gh-15524 with tm.assert_produces_warning(FutureWarning): dtype = np.timedelta64 s = Series([], dtype=dtype) assert s.empty assert s.dtype == 'm8[ns]' with tm.assert_produces_warning(FutureWarning): dtype = np.datetime64 s = Series([], dtype=dtype) assert s.empty assert s.dtype == 'M8[ns]' # These timestamps have the wrong frequencies, # so an Exception should be raised now. msg = "cannot convert timedeltalike" with tm.assert_raises_regex(TypeError, msg): Series([], dtype='m8[ps]') msg = "cannot convert datetimelike" with tm.assert_raises_regex(TypeError, msg): Series([], dtype='M8[ps]')
def test_match(self): # New match behavior introduced in 0.13 values = Series(['fooBAD__barBAD', NA, 'foo']) with tm.assert_produces_warning(): result = values.str.match('.*(BAD[_]+).*(BAD)', as_indexer=True) exp = Series([True, NA, False]) tm.assert_series_equal(result, exp) # If no groups, use new behavior even when as_indexer is False. # (Old behavior is pretty much useless in this case.) values = Series(['fooBAD__barBAD', NA, 'foo']) result = values.str.match('.*BAD[_]+.*BAD', as_indexer=False) exp = Series([True, NA, False]) tm.assert_series_equal(result, exp) # mixed mixed = Series(['aBAD_BAD', NA, 'BAD_b_BAD', True, datetime.today(), 'foo', None, 1, 2.]) with tm.assert_produces_warning(): rs = Series(mixed).str.match('.*(BAD[_]+).*(BAD)', as_indexer=True) xp = [True, NA, True, NA, NA, False, NA, NA, NA] tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode values = Series([u('fooBAD__barBAD'), NA, u('foo')]) with tm.assert_produces_warning(): result = values.str.match('.*(BAD[_]+).*(BAD)', as_indexer=True) exp = Series([True, NA, False]) tm.assert_series_equal(result, exp)
def test_ndarray_compat_properties(self): for o in self.objs: # Check that we work. for p in ['shape', 'dtype', 'T', 'nbytes']: assert getattr(o, p, None) is not None # deprecated properties for p in ['flags', 'strides', 'itemsize']: with tm.assert_produces_warning(FutureWarning): assert getattr(o, p, None) is not None with tm.assert_produces_warning(FutureWarning): assert hasattr(o, 'base') # If we have a datetime-like dtype then needs a view to work # but the user is responsible for that try: with tm.assert_produces_warning(FutureWarning): assert o.data is not None except ValueError: pass with pytest.raises(ValueError): o.item() # len > 1 assert o.ndim == 1 assert o.size == len(o) assert Index([1]).item() == 1 assert Series([1]).item() == 1
def test_c_engine(self): # see gh-6607 data = 'a b c\n1 2 3' msg = 'does not support' # specify C-unsupported options with python-unsupported option # (options will be ignored on fallback, raise) with tm.assertRaisesRegexp(ValueError, msg): read_table(StringIO(data), sep=None, delim_whitespace=False, dtype={'a': float}) with tm.assertRaisesRegexp(ValueError, msg): read_table(StringIO(data), sep='\s', dtype={'a': float}) with tm.assertRaisesRegexp(ValueError, msg): read_table(StringIO(data), skipfooter=1, dtype={'a': float}) # specify C engine with unsupported options (raise) with tm.assertRaisesRegexp(ValueError, msg): read_table(StringIO(data), engine='c', sep=None, delim_whitespace=False) with tm.assertRaisesRegexp(ValueError, msg): read_table(StringIO(data), engine='c', sep='\s') with tm.assertRaisesRegexp(ValueError, msg): read_table(StringIO(data), engine='c', sep='§') with tm.assertRaisesRegexp(ValueError, msg): read_table(StringIO(data), engine='c', skipfooter=1) # specify C-unsupported options without python-unsupported options with tm.assert_produces_warning(parsers.ParserWarning): read_table(StringIO(data), sep=None, delim_whitespace=False) with tm.assert_produces_warning(parsers.ParserWarning): read_table(StringIO(data), sep='\s') with tm.assert_produces_warning(parsers.ParserWarning): read_table(StringIO(data), skipfooter=1) text = """ A B C D E one two three four a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640 a q 20 4 0.4473 1.4152 0.2834 1.00661 0.1744 x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838""" msg = 'Error tokenizing data' with tm.assertRaisesRegexp(CParserError, msg): read_table(StringIO(text), sep='\s+') with tm.assertRaisesRegexp(CParserError, msg): read_table(StringIO(text), engine='c', sep='\s+') msg = "Only length-1 thousands markers supported" data = """A|B|C 1|2,334|5 10|13|10. """ with tm.assertRaisesRegexp(ValueError, msg): read_csv(StringIO(data), thousands=',,') with tm.assertRaisesRegexp(ValueError, msg): read_csv(StringIO(data), thousands='') msg = "Only length-1 line terminators supported" data = 'a,b,c~~1,2,3~~4,5,6' with tm.assertRaisesRegexp(ValueError, msg): read_csv(StringIO(data), lineterminator='~~')
def test_deprecated_match(self): # Old match behavior, deprecated (but still default) in 0.13 values = Series(['fooBAD__barBAD', NA, 'foo']) with tm.assert_produces_warning(): result = values.str.match('.*(BAD[_]+).*(BAD)') exp = Series([('BAD__', 'BAD'), NA, []]) tm.assert_series_equal(result, exp) # mixed mixed = Series(['aBAD_BAD', NA, 'BAD_b_BAD', True, datetime.today(), 'foo', None, 1, 2.]) with tm.assert_produces_warning(): rs = Series(mixed).str.match('.*(BAD[_]+).*(BAD)') xp = [('BAD_', 'BAD'), NA, ('BAD_', 'BAD'), NA, NA, [], NA, NA, NA] tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode values = Series([u('fooBAD__barBAD'), NA, u('foo')]) with tm.assert_produces_warning(): result = values.str.match('.*(BAD[_]+).*(BAD)') exp = Series([(u('BAD__'), u('BAD')), NA, []]) tm.assert_series_equal(result, exp)
def test_wls_panel(self): y = tm.makeTimeDataFrame() x = Panel({'x1': tm.makeTimeDataFrame(), 'x2': tm.makeTimeDataFrame()}) y.ix[[1, 7], 'A'] = np.nan y.ix[[6, 15], 'B'] = np.nan y.ix[[3, 20], 'C'] = np.nan y.ix[[5, 11], 'D'] = np.nan stack_y = y.stack() stack_x = DataFrame(dict((k, v.stack()) for k, v in x.iteritems())) weights = x.std('items') stack_weights = weights.stack() stack_y.index = stack_y.index._tuple_index stack_x.index = stack_x.index._tuple_index stack_weights.index = stack_weights.index._tuple_index with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = ols(y=y, x=x, weights=1 / weights) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): expected = ols(y=stack_y, x=stack_x, weights=1 / stack_weights) assert_almost_equal(result.beta, expected.beta) for attr in ['resid', 'y_fitted']: rvals = getattr(result, attr).stack().values evals = getattr(expected, attr).values assert_almost_equal(rvals, evals)
def checkMovingOLS(self, x, y, window_type='rolling', **kwds): window = 25 # must be larger than rank of x with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): moving = ols(y=y, x=x, window_type=window_type, window=window, **kwds) index = moving._index for n, i in enumerate(moving._valid_indices): if window_type == 'rolling' and i >= window: prior_date = index[i - window + 1] else: prior_date = index[0] date = index[i] x_iter = {} for k, v in compat.iteritems(x): x_iter[k] = v.truncate(before=prior_date, after=date) y_iter = y.truncate(before=prior_date, after=date) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): static = ols(y=y_iter, x=x_iter, **kwds) self.compare(static, moving, event_index=i, result_index=n) _check_non_raw_results(moving)
def test_check_label_or_level_ambiguity_df(df_ambig, axis): # Transpose frame if axis == 1 if axis == 1: df_ambig = df_ambig.T # df_ambig has both an on-axis level and off-axis label named L1 # Therefore L1 is ambiguous with tm.assert_produces_warning(FutureWarning, clear=True, check_stacklevel=False) as w: assert df_ambig._check_label_or_level_ambiguity('L1', axis=axis) warning_msg = w[0].message.args[0] if axis == 0: assert warning_msg.startswith("'L1' is both an index level " "and a column label") else: assert warning_msg.startswith("'L1' is both a column level " "and an index label") # df_ambig has an on-axis level named L2 and it is not ambiguous # No warning should be raised with tm.assert_produces_warning(None): assert not df_ambig._check_label_or_level_ambiguity('L2', axis=axis) # df_ambig has an off-axis label named L3 and it is not ambiguous with tm.assert_produces_warning(None): assert not df_ambig._is_level_reference('L3', axis=axis)
def test_deprecate_kwarg(self): x = 78 with tm.assert_produces_warning(FutureWarning): result = self.f1(old=x) assert result is x with tm.assert_produces_warning(None): self.f1(new=x)
def test_boxplot_return_type_legacy(self): # API change in https://github.com/pydata/pandas/pull/7096 import matplotlib as mpl # noqa df = DataFrame(randn(6, 4), index=list(string.ascii_letters[:6]), columns=['one', 'two', 'three', 'four']) with tm.assertRaises(ValueError): df.boxplot(return_type='NOTATYPE') with tm.assert_produces_warning(FutureWarning): result = df.boxplot() # change to Axes in future self._check_box_return_type(result, 'dict') with tm.assert_produces_warning(False): result = df.boxplot(return_type='dict') self._check_box_return_type(result, 'dict') with tm.assert_produces_warning(False): result = df.boxplot(return_type='axes') self._check_box_return_type(result, 'axes') with tm.assert_produces_warning(False): result = df.boxplot(return_type='both') self._check_box_return_type(result, 'both')
def test_pi_add_offset_array(self, box): # GH#18849 pi = pd.PeriodIndex([pd.Period('2015Q1'), pd.Period('2016Q2')]) offs = box([pd.offsets.QuarterEnd(n=1, startingMonth=12), pd.offsets.QuarterEnd(n=-2, startingMonth=12)]) expected = pd.PeriodIndex([pd.Period('2015Q2'), pd.Period('2015Q4')]) with tm.assert_produces_warning(PerformanceWarning): res = pi + offs tm.assert_index_equal(res, expected) with tm.assert_produces_warning(PerformanceWarning): res2 = offs + pi tm.assert_index_equal(res2, expected) unanchored = np.array([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)]) # addition/subtraction ops with incompatible offsets should issue # a PerformanceWarning and _then_ raise a TypeError. with pytest.raises(IncompatibleFrequency): with tm.assert_produces_warning(PerformanceWarning): pi + unanchored with pytest.raises(IncompatibleFrequency): with tm.assert_produces_warning(PerformanceWarning): unanchored + pi
def _test_small_strings_no_warn(self, compress): empty = np.array([], dtype='uint8') with tm.assert_produces_warning(None): empty_unpacked = self.encode_decode(empty, compress=compress) tm.assert_numpy_array_equal(empty_unpacked, empty) assert empty_unpacked.flags.writeable char = np.array([ord(b'a')], dtype='uint8') with tm.assert_produces_warning(None): char_unpacked = self.encode_decode(char, compress=compress) tm.assert_numpy_array_equal(char_unpacked, char) assert char_unpacked.flags.writeable # if this test fails I am sorry because the interpreter is now in a # bad state where b'a' points to 98 == ord(b'b'). char_unpacked[0] = ord(b'b') # we compare the ord of bytes b'a' with unicode 'a' because the should # always be the same (unless we were able to mutate the shared # character singleton in which case ord(b'a') == ord(b'b'). assert ord(b'a') == ord('a') tm.assert_numpy_array_equal( char_unpacked, np.array([ord(b'b')], dtype='uint8'), )
def test_grouper_column_index_level_precedence(frame, key_strs, key_groupers, level_groupers): # GH 5677, when a string passed as the `by` parameter # matches a column and an index level the column takes # precedence and a FutureWarning is raised # Add 'inner' column to frame # (frame already has an 'inner' index) frame['inner'] = [1, 1, 1, 1, 1, 1] # Performing a groupby with strings should produce warning with tm.assert_produces_warning(FutureWarning): result = frame.groupby(key_strs).mean() # Grouping with key Grouper should produce the same result and no warning with tm.assert_produces_warning(False): expected = frame.groupby(key_groupers).mean() assert_frame_equal(result, expected) # Grouping with level Grouper should produce a different result but # still no warning with tm.assert_produces_warning(False): not_expected = frame.groupby(level_groupers).mean() assert not result.index.equals(not_expected.index)
def test_wls_panel(self): y = tm.makeTimeDataFrame() x = Panel({"x1": tm.makeTimeDataFrame(), "x2": tm.makeTimeDataFrame()}) y.ix[[1, 7], "A"] = np.nan y.ix[[6, 15], "B"] = np.nan y.ix[[3, 20], "C"] = np.nan y.ix[[5, 11], "D"] = np.nan stack_y = y.stack() stack_x = DataFrame(dict((k, v.stack()) for k, v in compat.iteritems(x))) weights = x.std("items") stack_weights = weights.stack() stack_y.index = stack_y.index._tuple_index stack_x.index = stack_x.index._tuple_index stack_weights.index = stack_weights.index._tuple_index with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = ols(y=y, x=x, weights=1 / weights) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): expected = ols(y=stack_y, x=stack_x, weights=1 / stack_weights) assert_almost_equal(result.beta, expected.beta) for attr in ["resid", "y_fitted"]: rvals = getattr(result, attr).stack().values evals = getattr(expected, attr).values assert_almost_equal(rvals, evals)
def test_boxplot_legacy2(self): df = DataFrame(np.random.rand(10, 2), columns=['Col1', 'Col2']) df['X'] = Series(['A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B']) df['Y'] = Series(['A'] * 10) with tm.assert_produces_warning(UserWarning): _check_plot_works(df.boxplot, by='X') # When ax is supplied and required number of axes is 1, # passed ax should be used: fig, ax = self.plt.subplots() axes = df.boxplot('Col1', by='X', ax=ax) ax_axes = ax.axes assert ax_axes is axes fig, ax = self.plt.subplots() axes = df.groupby('Y').boxplot(ax=ax, return_type='axes') ax_axes = ax.axes assert ax_axes is axes['A'] # Multiple columns with an ax argument should use same figure fig, ax = self.plt.subplots() with tm.assert_produces_warning(UserWarning): axes = df.boxplot(column=['Col1', 'Col2'], by='X', ax=ax, return_type='axes') assert axes['Col1'].get_figure() is fig # When by is None, check that all relevant lines are present in the # dict fig, ax = self.plt.subplots() d = df.boxplot(ax=ax, return_type='dict') lines = list(itertools.chain.from_iterable(d.values())) assert len(ax.get_lines()) == len(lines)
def test_multi_function_flexible_mix(df): # GH #1268 grouped = df.groupby('A') # Expected d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])], ['D', {'sum': 'sum'}]]) # this uses column selection & renaming with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): expected = grouped.aggregate(d) # Test 1 d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])], ['D', 'sum']]) # this uses column selection & renaming with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = grouped.aggregate(d) tm.assert_frame_equal(result, expected) # Test 2 d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])], ['D', ['sum']]]) # this uses column selection & renaming with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = grouped.aggregate(d) tm.assert_frame_equal(result, expected)
def checkFamaMacBethExtended(self, window_type, x, y, **kwds): window = 25 with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = fama_macbeth(y=y, x=x, window_type=window_type, window=window, **kwds) self._check_stuff_works(result) index = result._index time = len(index) for i in range(time - window + 1): if window_type == 'rolling': start = index[i] else: start = index[0] end = index[i + window - 1] x2 = {} for k, v in compat.iteritems(x): x2[k] = v.truncate(start, end) y2 = y.truncate(start, end) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): reference = fama_macbeth(y=y2, x=x2, **kwds) assert_almost_equal(reference._stats, result._stats[:, i]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): static = fama_macbeth(y=y2, x=x2, **kwds) self._check_stuff_works(static)
def test_merge_index_column_precedence(df1, df2): # Construct left_df with both an index and a column named 'outer'. # We make this 'outer' column equal to the 'inner' column so that we # can verify that the correct values are used by the merge operation left_df = df1.set_index('outer') left_df['outer'] = left_df['inner'] # Construct right_df with an index level named 'outer' right_df = df2.set_index('outer') # Construct expected result. # The 'outer' column from left_df is chosen and the resulting # frame has no index levels expected = (left_df.reset_index(level='outer', drop=True) .merge(right_df.reset_index(), on=['outer', 'inner'])) # Merge left_df and right_df on 'outer' and 'inner' # 'outer' for left_df should refer to the 'outer' column, not the # 'outer' index level and a FutureWarning should be raised with tm.assert_produces_warning(FutureWarning): result = left_df.merge(right_df, on=['outer', 'inner']) # Check results assert_frame_equal(result, expected) # Perform the same using the left_on and right_on parameters with tm.assert_produces_warning(FutureWarning): result = left_df.merge(right_df, left_on=['outer', 'inner'], right_on=['outer', 'inner']) assert_frame_equal(result, expected)
def test_astype_categorical_to_categorical(self, name, dtype_ordered, series_ordered): # GH 10696/18593 s_data = list('abcaacbab') s_dtype = CategoricalDtype(list('bac'), ordered=series_ordered) s = Series(s_data, dtype=s_dtype, name=name) # unspecified categories dtype = CategoricalDtype(ordered=dtype_ordered) result = s.astype(dtype) exp_dtype = CategoricalDtype(s_dtype.categories, dtype_ordered) expected = Series(s_data, name=name, dtype=exp_dtype) tm.assert_series_equal(result, expected) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = s.astype('category', ordered=dtype_ordered) tm.assert_series_equal(result, expected) # different categories dtype = CategoricalDtype(list('adc'), dtype_ordered) result = s.astype(dtype) expected = Series(s_data, name=name, dtype=dtype) tm.assert_series_equal(result, expected) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = s.astype( 'category', categories=list('adc'), ordered=dtype_ordered) tm.assert_series_equal(result, expected) if dtype_ordered is False: # not specifying ordered, so only test once expected = s result = s.astype('category') tm.assert_series_equal(result, expected)
def test_setattr_warnings(): # GH5904 - Suggestion: Warning for DataFrame colname-methodname clash # GH7175 - GOTCHA: You can't use dot notation to add a column... d = {'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']), 'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])} df = pd.DataFrame(d) with catch_warnings(record=True) as w: # successfully add new column # this should not raise a warning df['three'] = df.two + 1 assert len(w) == 0 assert df.three.sum() > df.two.sum() with catch_warnings(record=True) as w: # successfully modify column in place # this should not raise a warning df.one += 1 assert len(w) == 0 assert df.one.iloc[0] == 2 with catch_warnings(record=True) as w: # successfully add an attribute to a series # this should not raise a warning df.two.not_an_index = [1, 2] assert len(w) == 0 with tm.assert_produces_warning(UserWarning): # warn when setting column to nonexistent name df.four = df.two + 2 assert df.four.sum() > df.two.sum() with tm.assert_produces_warning(UserWarning): # warn when column has same name as method df['sum'] = df.two
def test_agg_compat(self): # GH 12334 df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], 'B': ['one', 'one', 'two', 'two', 'two', 'two', 'one', 'two'], 'C': np.random.randn(8) + 1.0, 'D': np.arange(8)}) g = df.groupby(['A', 'B']) expected = pd.concat([g['D'].sum(), g['D'].std()], axis=1) expected.columns = MultiIndex.from_tuples([('C', 'sum'), ('C', 'std')]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = g['D'].agg({'C': ['sum', 'std']}) assert_frame_equal(result, expected, check_like=True) expected = pd.concat([g['D'].sum(), g['D'].std()], axis=1) expected.columns = ['C', 'D'] with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = g['D'].agg({'C': 'sum', 'D': 'std'}) assert_frame_equal(result, expected, check_like=True)
def test_ambiguous_warns(self): df = pd.DataFrame({"A": [1, 2]}) with tm.assert_produces_warning(FutureWarning): df.rename(id, id) with tm.assert_produces_warning(FutureWarning): df.rename({0: 10}, {"A": "B"})
def test_empty_with_nrows_chunksize(self): # see gh-9535 expected = DataFrame([], columns=['foo', 'bar']) result = self.read_csv(StringIO('foo,bar\n'), nrows=10) tm.assert_frame_equal(result, expected) result = next(iter(self.read_csv( StringIO('foo,bar\n'), chunksize=10))) tm.assert_frame_equal(result, expected) with tm.assert_produces_warning( FutureWarning, check_stacklevel=False): result = self.read_csv(StringIO('foo,bar\n'), nrows=10, as_recarray=True) result = DataFrame(result[2], columns=result[1], index=result[0]) tm.assert_frame_equal(DataFrame.from_records( result), expected, check_index_type=False) with tm.assert_produces_warning( FutureWarning, check_stacklevel=False): result = next(iter(self.read_csv(StringIO('foo,bar\n'), chunksize=10, as_recarray=True))) result = DataFrame(result[2], columns=result[1], index=result[0]) tm.assert_frame_equal(DataFrame.from_records(result), expected, check_index_type=False)
def checkMovingOLS(self, window_type, x, y, weights=None, **kwds): window = np.linalg.matrix_rank(x.values) * 2 with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): moving = ols(y=y, x=x, weights=weights, window_type=window_type, window=window, **kwds) # check that sparse version is the same with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): sparse_moving = ols( y=y.to_sparse(), x=x.to_sparse(), weights=weights, window_type=window_type, window=window, **kwds ) _compare_ols_results(moving, sparse_moving) index = moving._index for n, i in enumerate(moving._valid_indices): if window_type == "rolling" and i >= window: prior_date = index[i - window + 1] else: prior_date = index[0] date = index[i] x_iter = {} for k, v in compat.iteritems(x): x_iter[k] = v.truncate(before=prior_date, after=date) y_iter = y.truncate(before=prior_date, after=date) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): static = ols(y=y_iter, x=x_iter, weights=weights, **kwds) self.compare(static, moving, event_index=i, result_index=n) _check_non_raw_results(moving)
def test_dups_fancy_indexing(self): # GH 3455 from pandas.util.testing import makeCustomDataframe as mkdf df = mkdf(10, 3) df.columns = ['a', 'a', 'b'] result = df[['b', 'a']].columns expected = Index(['b', 'a', 'a']) tm.assert_index_equal(result, expected) # across dtypes df = DataFrame([[1, 2, 1., 2., 3., 'foo', 'bar']], columns=list('aaaaaaa')) df.head() str(df) result = DataFrame([[1, 2, 1., 2., 3., 'foo', 'bar']]) result.columns = list('aaaaaaa') # TODO(wesm): unused? df_v = df.iloc[:, 4] # noqa res_v = result.iloc[:, 4] # noqa tm.assert_frame_equal(df, result) # GH 3561, dups not in selected order df = DataFrame( {'test': [5, 7, 9, 11], 'test1': [4., 5, 6, 7], 'other': list('abcd')}, index=['A', 'A', 'B', 'C']) rows = ['C', 'B'] expected = DataFrame( {'test': [11, 9], 'test1': [7., 6], 'other': ['d', 'c']}, index=rows) result = df.loc[rows] tm.assert_frame_equal(result, expected) result = df.loc[Index(rows)] tm.assert_frame_equal(result, expected) rows = ['C', 'B', 'E'] expected = DataFrame( {'test': [11, 9, np.nan], 'test1': [7., 6, np.nan], 'other': ['d', 'c', np.nan]}, index=rows) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.loc[rows] tm.assert_frame_equal(result, expected) # see GH5553, make sure we use the right indexer rows = ['F', 'G', 'H', 'C', 'B', 'E'] expected = DataFrame({'test': [np.nan, np.nan, np.nan, 11, 9, np.nan], 'test1': [np.nan, np.nan, np.nan, 7., 6, np.nan], 'other': [np.nan, np.nan, np.nan, 'd', 'c', np.nan]}, index=rows) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.loc[rows] tm.assert_frame_equal(result, expected) # List containing only missing label dfnu = DataFrame(np.random.randn(5, 3), index=list('AABCD')) with pytest.raises(KeyError): dfnu.loc[['E']] # ToDo: check_index_type can be True after GH 11497 # GH 4619; duplicate indexer with missing label df = DataFrame({"A": [0, 1, 2]}) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.loc[[0, 8, 0]] expected = DataFrame({"A": [0, np.nan, 0]}, index=[0, 8, 0]) tm.assert_frame_equal(result, expected, check_index_type=False) df = DataFrame({"A": list('abc')}) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.loc[[0, 8, 0]] expected = DataFrame({"A": ['a', np.nan, 'a']}, index=[0, 8, 0]) tm.assert_frame_equal(result, expected, check_index_type=False) # non unique with non unique selector df = DataFrame({'test': [5, 7, 9, 11]}, index=['A', 'A', 'B', 'C']) expected = DataFrame( {'test': [5, 7, 5, 7, np.nan]}, index=['A', 'A', 'A', 'A', 'E']) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.loc[['A', 'A', 'E']] tm.assert_frame_equal(result, expected)
def test_asobject_deprecated(self): s = Series(np.random.randn(5), name='foo') with tm.assert_produces_warning(FutureWarning): o = s.asobject assert isinstance(o, np.ndarray)
def test_dtype_str(indices): with tm.assert_produces_warning(FutureWarning): dtype = indices.dtype_str assert isinstance(dtype, str) assert dtype == str(indices.dtype)
def test_pairs(self): data = { "birthdt": [ "08jan2009", "20dec2008", "30dec2008", "21dec2008", "11jan2009", ], "birthwt": [1766, 3301, 1454, 3139, 4133], "id": [101, 102, 103, 104, 105], "sex": ["Male", "Female", "Female", "Female", "Female"], "visitdt1": [ "11jan2009", "22dec2008", "04jan2009", "29dec2008", "20jan2009", ], "visitdt2": ["21jan2009", np.nan, "22jan2009", "31dec2008", "03feb2009"], "visitdt3": ["05feb2009", np.nan, np.nan, "02jan2009", "15feb2009"], "wt1": [1823, 3338, 1549, 3298, 4306], "wt2": [2011.0, np.nan, 1892.0, 3338.0, 4575.0], "wt3": [2293.0, np.nan, np.nan, 3377.0, 4805.0], } df = DataFrame(data) spec = { "visitdt": ["visitdt{i:d}".format(i=i) for i in range(1, 4)], "wt": ["wt{i:d}".format(i=i) for i in range(1, 4)], } result = lreshape(df, spec) exp_data = { "birthdt": [ "08jan2009", "20dec2008", "30dec2008", "21dec2008", "11jan2009", "08jan2009", "30dec2008", "21dec2008", "11jan2009", "08jan2009", "21dec2008", "11jan2009", ], "birthwt": [ 1766, 3301, 1454, 3139, 4133, 1766, 1454, 3139, 4133, 1766, 3139, 4133, ], "id": [101, 102, 103, 104, 105, 101, 103, 104, 105, 101, 104, 105], "sex": [ "Male", "Female", "Female", "Female", "Female", "Male", "Female", "Female", "Female", "Male", "Female", "Female", ], "visitdt": [ "11jan2009", "22dec2008", "04jan2009", "29dec2008", "20jan2009", "21jan2009", "22jan2009", "31dec2008", "03feb2009", "05feb2009", "02jan2009", "15feb2009", ], "wt": [ 1823.0, 3338.0, 1549.0, 3298.0, 4306.0, 2011.0, 1892.0, 3338.0, 4575.0, 2293.0, 3377.0, 4805.0, ], } exp = DataFrame(exp_data, columns=result.columns) tm.assert_frame_equal(result, exp) result = lreshape(df, spec, dropna=False) exp_data = { "birthdt": [ "08jan2009", "20dec2008", "30dec2008", "21dec2008", "11jan2009", "08jan2009", "20dec2008", "30dec2008", "21dec2008", "11jan2009", "08jan2009", "20dec2008", "30dec2008", "21dec2008", "11jan2009", ], "birthwt": [ 1766, 3301, 1454, 3139, 4133, 1766, 3301, 1454, 3139, 4133, 1766, 3301, 1454, 3139, 4133, ], "id": [ 101, 102, 103, 104, 105, 101, 102, 103, 104, 105, 101, 102, 103, 104, 105, ], "sex": [ "Male", "Female", "Female", "Female", "Female", "Male", "Female", "Female", "Female", "Female", "Male", "Female", "Female", "Female", "Female", ], "visitdt": [ "11jan2009", "22dec2008", "04jan2009", "29dec2008", "20jan2009", "21jan2009", np.nan, "22jan2009", "31dec2008", "03feb2009", "05feb2009", np.nan, np.nan, "02jan2009", "15feb2009", ], "wt": [ 1823.0, 3338.0, 1549.0, 3298.0, 4306.0, 2011.0, np.nan, 1892.0, 3338.0, 4575.0, 2293.0, np.nan, np.nan, 3377.0, 4805.0, ], } exp = DataFrame(exp_data, columns=result.columns) tm.assert_frame_equal(result, exp) with tm.assert_produces_warning(FutureWarning): result = lreshape(df, spec, dropna=False, label="foo") spec = { "visitdt": ["visitdt{i:d}".format(i=i) for i in range(1, 3)], "wt": ["wt{i:d}".format(i=i) for i in range(1, 4)], } msg = "All column lists must be same length" with pytest.raises(ValueError, match=msg): lreshape(df, spec)
def test_apply_broadcast_deprecated(self): with tm.assert_produces_warning(FutureWarning): self.frame.apply(np.mean, broadcast=True)
def test_apply_deprecate_reduce(self): x = [] with tm.assert_produces_warning(FutureWarning): self.empty.apply(x.append, axis=1, reduce=True)
def test_depreciate_tz_and_tzinfo_in_datetime_input(self, box): # GH 23579 kwargs = {'year': 2018, 'month': 1, 'day': 1, 'tzinfo': utc} with tm.assert_produces_warning(FutureWarning): Timestamp(box(**kwargs), tz='US/Pacific')
def test_alias_to_unit_raises(self): # 23990 with tm.assert_produces_warning(FutureWarning): DatetimeTZDtype('datetime64[ns, US/Central]')
def test_aggregate_api_consistency(self): # GH 9052 # make sure that the aggregates via dict # are consistent df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], 'B': ['one', 'one', 'two', 'two', 'two', 'two', 'one', 'two'], 'C': np.random.randn(8) + 1.0, 'D': np.arange(8)}) grouped = df.groupby(['A', 'B']) c_mean = grouped['C'].mean() c_sum = grouped['C'].sum() d_mean = grouped['D'].mean() d_sum = grouped['D'].sum() result = grouped['D'].agg(['sum', 'mean']) expected = pd.concat([d_sum, d_mean], axis=1) expected.columns = ['sum', 'mean'] assert_frame_equal(result, expected, check_like=True) result = grouped.agg([np.sum, np.mean]) expected = pd.concat([c_sum, c_mean, d_sum, d_mean], axis=1) expected.columns = MultiIndex.from_product([['C', 'D'], ['sum', 'mean']]) assert_frame_equal(result, expected, check_like=True) result = grouped[['D', 'C']].agg([np.sum, np.mean]) expected = pd.concat([d_sum, d_mean, c_sum, c_mean], axis=1) expected.columns = MultiIndex.from_product([['D', 'C'], ['sum', 'mean']]) assert_frame_equal(result, expected, check_like=True) result = grouped.agg({'C': 'mean', 'D': 'sum'}) expected = pd.concat([d_sum, c_mean], axis=1) assert_frame_equal(result, expected, check_like=True) result = grouped.agg({'C': ['mean', 'sum'], 'D': ['mean', 'sum']}) expected = pd.concat([c_mean, c_sum, d_mean, d_sum], axis=1) expected.columns = MultiIndex.from_product([['C', 'D'], ['mean', 'sum']]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = grouped[['D', 'C']].agg({'r': np.sum, 'r2': np.mean}) expected = pd.concat([d_sum, c_sum, d_mean, c_mean], axis=1) expected.columns = MultiIndex.from_product([['r', 'r2'], ['D', 'C']]) assert_frame_equal(result, expected, check_like=True)
def test_real_imag_deprecated(self): # GH 18262 s = pd.Series([1]) with tm.assert_produces_warning(FutureWarning): s.imag s.real
def test_order(self): for ind in self.indices.values(): # 9816 deprecated with tm.assert_produces_warning(FutureWarning): ind.order()
def panel(): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): return pd.Panel()
def test_tz_localize_errors_ambiguous(self): # GH#13057 ts = Timestamp('2015-11-1 01:00') with pytest.raises(AmbiguousTimeError): with tm.assert_produces_warning(FutureWarning): ts.tz_localize('US/Pacific', errors='coerce')
def panel(): with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): return pd.Panel()
def test_missing_deprecate_kwarg(self): x = 'bogus' with tm.assert_produces_warning(FutureWarning): result = self.f2(old=x) assert result == 'bogus'
def test_duplicated_drop_duplicates_index(self): # GH 4060 for original in self.objs: if isinstance(original, Index): # special case if original.is_boolean(): result = original.drop_duplicates() expected = Index([False, True], name='a') tm.assert_index_equal(result, expected) continue # original doesn't have duplicates expected = np.array([False] * len(original), dtype=bool) duplicated = original.duplicated() tm.assert_numpy_array_equal(duplicated, expected) self.assertTrue(duplicated.dtype == bool) result = original.drop_duplicates() tm.assert_index_equal(result, original) self.assertFalse(result is original) # has_duplicates self.assertFalse(original.has_duplicates) # create repeated values, 3rd and 5th values are duplicated idx = original[list(range(len(original))) + [5, 3]] expected = np.array([False] * len(original) + [True, True], dtype=bool) duplicated = idx.duplicated() tm.assert_numpy_array_equal(duplicated, expected) self.assertTrue(duplicated.dtype == bool) tm.assert_index_equal(idx.drop_duplicates(), original) base = [False] * len(idx) base[3] = True base[5] = True expected = np.array(base) duplicated = idx.duplicated(keep='last') tm.assert_numpy_array_equal(duplicated, expected) self.assertTrue(duplicated.dtype == bool) result = idx.drop_duplicates(keep='last') tm.assert_index_equal(result, idx[~expected]) # deprecate take_last with tm.assert_produces_warning(FutureWarning): duplicated = idx.duplicated(take_last=True) tm.assert_numpy_array_equal(duplicated, expected) self.assertTrue(duplicated.dtype == bool) with tm.assert_produces_warning(FutureWarning): result = idx.drop_duplicates(take_last=True) tm.assert_index_equal(result, idx[~expected]) base = [False] * len(original) + [True, True] base[3] = True base[5] = True expected = np.array(base) duplicated = idx.duplicated(keep=False) tm.assert_numpy_array_equal(duplicated, expected) self.assertTrue(duplicated.dtype == bool) result = idx.drop_duplicates(keep=False) tm.assert_index_equal(result, idx[~expected]) with tm.assertRaisesRegexp( TypeError, r"drop_duplicates\(\) got an unexpected " "keyword argument"): idx.drop_duplicates(inplace=True) else: expected = Series([False] * len(original), index=original.index, name='a') tm.assert_series_equal(original.duplicated(), expected) result = original.drop_duplicates() tm.assert_series_equal(result, original) self.assertFalse(result is original) idx = original.index[list(range(len(original))) + [5, 3]] values = original._values[list(range(len(original))) + [5, 3]] s = Series(values, index=idx, name='a') expected = Series([False] * len(original) + [True, True], index=idx, name='a') tm.assert_series_equal(s.duplicated(), expected) tm.assert_series_equal(s.drop_duplicates(), original) base = [False] * len(idx) base[3] = True base[5] = True expected = Series(base, index=idx, name='a') tm.assert_series_equal(s.duplicated(keep='last'), expected) tm.assert_series_equal(s.drop_duplicates(keep='last'), s[~np.array(base)]) # deprecate take_last with tm.assert_produces_warning(FutureWarning): tm.assert_series_equal( s.duplicated(take_last=True), expected) with tm.assert_produces_warning(FutureWarning): tm.assert_series_equal(s.drop_duplicates(take_last=True), s[~np.array(base)]) base = [False] * len(original) + [True, True] base[3] = True base[5] = True expected = Series(base, index=idx, name='a') tm.assert_series_equal(s.duplicated(keep=False), expected) tm.assert_series_equal(s.drop_duplicates(keep=False), s[~np.array(base)]) s.drop_duplicates(inplace=True) tm.assert_series_equal(s, original)
def test_valid_deprecated(self): # GH18800 with tm.assert_produces_warning(FutureWarning): pd.Series([]).valid()
def test_warns_non_roundtrippable_names(self, idx): # GH 19130 df = pd.DataFrame([[]], index=idx) df.index.name = 'index' with tm.assert_produces_warning(): set_default_names(df)
def test_deprecation(self): with tm.assert_produces_warning(FutureWarning): pd.ordered_merge(self.left, self.right, on='key')
def test_dict_deprecate_kwarg(self): x = 'yes' with tm.assert_produces_warning(FutureWarning): result = self.f2(old=x) assert result
def test_to_dict_not_unique_warning(self): # GH16927: When converting to a dict, if a column has a non-unique name # it will be dropped, throwing a warning. df = DataFrame([[1, 2, 3]], columns=["a", "a", "b"]) with tm.assert_produces_warning(UserWarning): df.to_dict()
def test_is_period_deprecated(): with tm.assert_produces_warning(FutureWarning): assert not com.is_period([1, 2, 3]) assert not com.is_period(pd.Index([1, 2, 3])) assert com.is_period(pd.PeriodIndex(["2017-01-01"], freq="D"))
def test_convert_objects(self): s = Series([1., 2, 3], index=['a', 'b', 'c']) with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_dates=False, convert_numeric=True) assert_series_equal(result, s) # force numeric conversion r = s.copy().astype('O') r['a'] = '1' with tm.assert_produces_warning(FutureWarning): result = r.convert_objects(convert_dates=False, convert_numeric=True) assert_series_equal(result, s) r = s.copy().astype('O') r['a'] = '1.' with tm.assert_produces_warning(FutureWarning): result = r.convert_objects(convert_dates=False, convert_numeric=True) assert_series_equal(result, s) r = s.copy().astype('O') r['a'] = 'garbled' expected = s.copy() expected['a'] = np.nan with tm.assert_produces_warning(FutureWarning): result = r.convert_objects(convert_dates=False, convert_numeric=True) assert_series_equal(result, expected) # GH 4119, not converting a mixed type (e.g.floats and object) s = Series([1, 'na', 3, 4]) with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_numeric=True) expected = Series([1, np.nan, 3, 4]) assert_series_equal(result, expected) s = Series([1, '', 3, 4]) with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_numeric=True) expected = Series([1, np.nan, 3, 4]) assert_series_equal(result, expected) # dates s = Series([ datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0), datetime(2001, 1, 3, 0, 0) ]) s2 = Series([ datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0), datetime(2001, 1, 3, 0, 0), 'foo', 1.0, 1, Timestamp('20010104'), '20010105' ], dtype='O') with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_dates=True, convert_numeric=False) expected = Series([ Timestamp('20010101'), Timestamp('20010102'), Timestamp('20010103') ], dtype='M8[ns]') assert_series_equal(result, expected) with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_dates='coerce', convert_numeric=False) with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_dates='coerce', convert_numeric=True) assert_series_equal(result, expected) expected = Series([ Timestamp('20010101'), Timestamp('20010102'), Timestamp('20010103'), lib.NaT, lib.NaT, lib.NaT, Timestamp('20010104'), Timestamp('20010105') ], dtype='M8[ns]') with tm.assert_produces_warning(FutureWarning): result = s2.convert_objects(convert_dates='coerce', convert_numeric=False) assert_series_equal(result, expected) with tm.assert_produces_warning(FutureWarning): result = s2.convert_objects(convert_dates='coerce', convert_numeric=True) assert_series_equal(result, expected) # preserver all-nans (if convert_dates='coerce') s = Series(['foo', 'bar', 1, 1.0], dtype='O') with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_dates='coerce', convert_numeric=False) expected = Series([lib.NaT] * 2 + [Timestamp(1)] * 2) assert_series_equal(result, expected) # preserver if non-object s = Series([1], dtype='float32') with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_dates='coerce', convert_numeric=False) assert_series_equal(result, s) # r = s.copy() # r[0] = np.nan # result = r.convert_objects(convert_dates=True,convert_numeric=False) # self.assertEqual(result.dtype, 'M8[ns]') # dateutil parses some single letters into today's value as a date for x in 'abcdefghijklmnopqrstuvwxyz': s = Series([x]) with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_dates='coerce') assert_series_equal(result, s) s = Series([x.upper()]) with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_dates='coerce') assert_series_equal(result, s)
def test_shim(): # https://github.com/pandas-dev/pandas/pull/16059 # Remove in 0.21 with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): from pandas.formats.style import Styler as _styler # noqa
def test_convert_objects_preserve_bool(self): s = Series([1, True, 3, 5], dtype=object) with tm.assert_produces_warning(FutureWarning): r = s.convert_objects(convert_numeric=True) e = Series([1, 1, 3, 5], dtype='i8') tm.assert_series_equal(r, e)
def test_TimeSeries_deprecation(self): # deprecation TimeSeries, #10890 with tm.assert_produces_warning(FutureWarning): pd.SparseTimeSeries(1, index=pd.date_range('20130101', periods=3))
def test_copy_delim_warning(self, df): with tm.assert_produces_warning(): df.to_clipboard(excel=False, sep='\t')
def test_convert_objects_preserve_all_bool(self): s = Series([False, True, False, False], dtype=object) with tm.assert_produces_warning(FutureWarning): r = s.convert_objects(convert_numeric=True) e = Series([False, True, False, False], dtype=bool) tm.assert_series_equal(r, e)
def test_excel_sep_warning(self, df): with tm.assert_produces_warning(): df.to_clipboard(excel=True, sep=r'\t')
def test_nth(): df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) g = df.groupby('A') assert_frame_equal(g.nth(0), df.iloc[[0, 2]].set_index('A')) assert_frame_equal(g.nth(1), df.iloc[[1]].set_index('A')) assert_frame_equal(g.nth(2), df.loc[[]].set_index('A')) assert_frame_equal(g.nth(-1), df.iloc[[1, 2]].set_index('A')) assert_frame_equal(g.nth(-2), df.iloc[[0]].set_index('A')) assert_frame_equal(g.nth(-3), df.loc[[]].set_index('A')) assert_series_equal(g.B.nth(0), df.set_index('A').B.iloc[[0, 2]]) assert_series_equal(g.B.nth(1), df.set_index('A').B.iloc[[1]]) assert_frame_equal(g[['B']].nth(0), df.loc[[0, 2], ['A', 'B']].set_index('A')) exp = df.set_index('A') assert_frame_equal(g.nth(0, dropna='any'), exp.iloc[[1, 2]]) assert_frame_equal(g.nth(-1, dropna='any'), exp.iloc[[1, 2]]) exp['B'] = np.nan assert_frame_equal(g.nth(7, dropna='any'), exp.iloc[[1, 2]]) assert_frame_equal(g.nth(2, dropna='any'), exp.iloc[[1, 2]]) # out of bounds, regression from 0.13.1 # GH 6621 df = DataFrame({ 'color': { 0: 'green', 1: 'green', 2: 'red', 3: 'red', 4: 'red' }, 'food': { 0: 'ham', 1: 'eggs', 2: 'eggs', 3: 'ham', 4: 'pork' }, 'two': { 0: 1.5456590000000001, 1: -0.070345000000000005, 2: -2.4004539999999999, 3: 0.46206000000000003, 4: 0.52350799999999997 }, 'one': { 0: 0.56573799999999996, 1: -0.9742360000000001, 2: 1.033801, 3: -0.78543499999999999, 4: 0.70422799999999997 } }).set_index(['color', 'food']) result = df.groupby(level=0, as_index=False).nth(2) expected = df.iloc[[-1]] assert_frame_equal(result, expected) result = df.groupby(level=0, as_index=False).nth(3) expected = df.loc[[]] assert_frame_equal(result, expected) # GH 7559 # from the vbench df = DataFrame(np.random.randint(1, 10, (100, 2)), dtype='int64') s = df[1] g = df[0] expected = s.groupby(g).first() expected2 = s.groupby(g).apply(lambda x: x.iloc[0]) assert_series_equal(expected2, expected, check_names=False) assert expected.name == 1 assert expected2.name == 1 # validate first v = s[g == 1].iloc[0] assert expected.iloc[0] == v assert expected2.iloc[0] == v # this is NOT the same as .first (as sorted is default!) # as it keeps the order in the series (and not the group order) # related GH 7287 expected = s.groupby(g, sort=False).first() result = s.groupby(g, sort=False).nth(0, dropna='all') assert_series_equal(result, expected) # doc example df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) g = df.groupby('A') # PR 17493, related to issue 11038 # test Series.nth with True for dropna produces FutureWarning with assert_produces_warning(FutureWarning): result = g.B.nth(0, dropna=True) expected = g.B.first() assert_series_equal(result, expected) # test multiple nth values df = DataFrame([[1, np.nan], [1, 3], [1, 4], [5, 6], [5, 7]], columns=['A', 'B']) g = df.groupby('A') assert_frame_equal(g.nth(0), df.iloc[[0, 3]].set_index('A')) assert_frame_equal(g.nth([0]), df.iloc[[0, 3]].set_index('A')) assert_frame_equal(g.nth([0, 1]), df.iloc[[0, 1, 3, 4]].set_index('A')) assert_frame_equal(g.nth([0, -1]), df.iloc[[0, 2, 3, 4]].set_index('A')) assert_frame_equal(g.nth([0, 1, 2]), df.iloc[[0, 1, 2, 3, 4]].set_index('A')) assert_frame_equal(g.nth([0, 1, -1]), df.iloc[[0, 1, 2, 3, 4]].set_index('A')) assert_frame_equal(g.nth([2]), df.iloc[[2]].set_index('A')) assert_frame_equal(g.nth([3, 4]), df.loc[[]].set_index('A')) business_dates = pd.date_range(start='4/1/2014', end='6/30/2014', freq='B') df = DataFrame(1, index=business_dates, columns=['a', 'b']) # get the first, fourth and last two business days for each month key = [df.index.year, df.index.month] result = df.groupby(key, as_index=False).nth([0, 3, -2, -1]) expected_dates = pd.to_datetime([ '2014/4/1', '2014/4/4', '2014/4/29', '2014/4/30', '2014/5/1', '2014/5/6', '2014/5/29', '2014/5/30', '2014/6/2', '2014/6/5', '2014/6/27', '2014/6/30' ]) expected = DataFrame(1, columns=['a', 'b'], index=expected_dates) assert_frame_equal(result, expected)