def test_datetime_tests(self): # TODO(BEAM-10721) datetimelike_result = doctests.testmod( pd.core.arrays.datetimelike, use_beam=False, skip={ 'pandas.core.arrays.datetimelike.AttributesMixin._unbox_scalar': [ '*' ], 'pandas.core.arrays.datetimelike.TimelikeOps.ceil': ['*'], 'pandas.core.arrays.datetimelike.TimelikeOps.floor': ['*'], 'pandas.core.arrays.datetimelike.TimelikeOps.round': ['*'], }) datetime_result = doctests.testmod( pd.core.arrays.datetimes, use_beam=False, skip={ 'pandas.core.arrays.datetimes.DatetimeArray.is_leap_year': ['*'], 'pandas.core.arrays.datetimes.DatetimeArray.is_month_end': ['*'], 'pandas.core.arrays.datetimes.DatetimeArray.is_month_start': ['*'], 'pandas.core.arrays.datetimes.DatetimeArray.is_quarter_end': ['*'], 'pandas.core.arrays.datetimes.DatetimeArray.is_quarter_start': [ '*' ], 'pandas.core.arrays.datetimes.DatetimeArray.is_year_end': ['*'], 'pandas.core.arrays.datetimes.DatetimeArray.is_year_start': ['*'], 'pandas.core.arrays.datetimes.DatetimeArray.to_period': ['*'], 'pandas.core.arrays.datetimes.DatetimeArray.tz_localize': ['*'], }) self.assertEqual(datetimelike_result.failed, 0) self.assertEqual(datetime_result.failed, 0)
def test_datetime_tests(self): # TODO(BEAM-10721) indexes_accessors_result = doctests.testmod( pd.core.indexes.accessors, use_beam=False, skip={ 'pandas.core.indexes.accessors.TimedeltaProperties': [ # Seems like an upstream bug. The property is 'second' 'seconds_series.dt.seconds' ], # TODO(BEAM-12530): Test data creation fails for these # s = pd.Series(pd.to_timedelta(np.arange(5), unit="d")) # pylint: disable=line-too-long 'pandas.core.indexes.accessors.DatetimeProperties.to_pydatetime': [ '*' ], 'pandas.core.indexes.accessors.TimedeltaProperties.components': [ '*' ], 'pandas.core.indexes.accessors.TimedeltaProperties.to_pytimedelta': [ '*' ], # pylint: enable=line-too-long }) datetimelike_result = doctests.testmod( pd.core.arrays.datetimelike, use_beam=False) datetime_result = doctests.testmod( pd.core.arrays.datetimes, use_beam=False, wont_implement_ok={ 'pandas.core.arrays.datetimes.DatetimeArray.to_period': ['*'], # All tz_localize tests use unsupported values for ambiguous= # Verified seperately in # frames_test.py::DeferredFrameTest::test_dt_tz_localize_* 'pandas.core.arrays.datetimes.DatetimeArray.tz_localize': ['*'], }, not_implemented_ok={ # Verifies index version of this method 'pandas.core.arrays.datetimes.DatetimeArray.to_period': [ 'df.index.to_period("M")' ], }) self.assertEqual(indexes_accessors_result.failed, 0) self.assertEqual(datetimelike_result.failed, 0) self.assertEqual(datetime_result.failed, 0)
def test_string_tests(self): result = doctests.testmod( pd.core.strings, use_beam=False, wont_implement_ok={ # These methods can accept deferred series objects, but not lists 'pandas.core.strings.StringMethods.cat': [ "s.str.cat(['A', 'B', 'C', 'D'], sep=',')", "s.str.cat(['A', 'B', 'C', 'D'], sep=',', na_rep='-')", "s.str.cat(['A', 'B', 'C', 'D'], na_rep='-')" ], 'pandas.core.strings.StringMethods.repeat': [ 's.str.repeat(repeats=[1, 2, 3])' ], 'pandas.core.strings.str_repeat': [ 's.str.repeat(repeats=[1, 2, 3])' ], }, skip={ # Bad test strings 'pandas.core.strings.str_replace': [ "pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr)" ], 'pandas.core.strings.StringMethods.replace': [ "pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr)" ], }) self.assertEqual(result.failed, 0)
def test_string_tests(self): result = doctests.testmod( pd.core.strings, use_beam=False, skip={ 'pandas.core.strings.StringMethods.cat': ['*'], 'pandas.core.strings.StringMethods.repeat': ['*'], 'pandas.core.strings.str_repeat': ['*'], # The rest of the skipped tests represent bad test strings, # fixed upstream in # https://github.com/pandas-dev/pandas/commit/d095ac899da953d759992824592a72a1e6ff5e09 'pandas.core.strings.StringMethods': ["s.str.split('_')", "s.str.replace('_', '')"], 'pandas.core.strings.str_split': ["s.str.split(expand=True)"], 'pandas.core.strings.str_replace': ["pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr)"], 'pandas.core.strings.StringMethods.replace': ["pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr)"], 'pandas.core.strings.StringMethods.partition': ['idx.str.partition()'], 'pandas.core.strings.StringMethods.rpartition': ['idx.str.partition()'], # rsplit/split are particularly troublesome because the first test, # defining a test series, is bad and must be skipped. But skipping # it breaks every other test. To run the rest we would need to # execute the first test but ignore the output. 'pandas.core.strings.StringMethods.rsplit': ["*"], 'pandas.core.strings.StringMethods.split': ["*"], }) self.assertEqual(result.failed, 0)
def test_dataframe_tests(self): result = doctests.testmod( pd.core.frame, use_beam=False, skip={ 'pandas.core.frame.DataFrame.T': ['*'], 'pandas.core.frame.DataFrame.agg': ['*'], 'pandas.core.frame.DataFrame.aggregate': ['*'], 'pandas.core.frame.DataFrame.append': ['*'], 'pandas.core.frame.DataFrame.apply': ['*'], 'pandas.core.frame.DataFrame.applymap': ['df ** 2'], 'pandas.core.frame.DataFrame.assign': ['*'], 'pandas.core.frame.DataFrame.axes': ['*'], 'pandas.core.frame.DataFrame.combine': ['*'], 'pandas.core.frame.DataFrame.combine_first': ['*'], 'pandas.core.frame.DataFrame.corr': ['*'], 'pandas.core.frame.DataFrame.count': ['*'], 'pandas.core.frame.DataFrame.cov': ['*'], 'pandas.core.frame.DataFrame.dot': ['*'], 'pandas.core.frame.DataFrame.drop': ['*'], 'pandas.core.frame.DataFrame.eval': ['*'], 'pandas.core.frame.DataFrame.explode': ['*'], 'pandas.core.frame.DataFrame.fillna': ['*'], 'pandas.core.frame.DataFrame.info': ['*'], 'pandas.core.frame.DataFrame.isin': ['*'], 'pandas.core.frame.DataFrame.iterrows': ["print(df['int'].dtype)"], 'pandas.core.frame.DataFrame.join': ['*'], 'pandas.core.frame.DataFrame.melt': ['*'], 'pandas.core.frame.DataFrame.memory_usage': ['*'], 'pandas.core.frame.DataFrame.merge': ['*'], # Not equal to df.agg('mode', axis='columns', numeric_only=True) 'pandas.core.frame.DataFrame.mode': ["df.mode(axis='columns', numeric_only=True)"], 'pandas.core.frame.DataFrame.nlargest': ['*'], 'pandas.core.frame.DataFrame.nsmallest': ['*'], 'pandas.core.frame.DataFrame.nunique': ['*'], 'pandas.core.frame.DataFrame.pivot': ['*'], 'pandas.core.frame.DataFrame.pivot_table': ['*'], 'pandas.core.frame.DataFrame.query': ['*'], 'pandas.core.frame.DataFrame.reindex': ['*'], 'pandas.core.frame.DataFrame.reindex_axis': ['*'], 'pandas.core.frame.DataFrame.rename': ['*'], # Raises right exception, but testing framework has matching issues. 'pandas.core.frame.DataFrame.replace': [ "df.replace({'a string': 'new value', True: False}) # raises" ], # Uses unseeded np.random. 'pandas.core.frame.DataFrame.round': ['*'], 'pandas.core.frame.DataFrame.set_index': ['*'], 'pandas.core.frame.DataFrame.transpose': ['df1_transposed.dtypes', 'df2_transposed.dtypes'], 'pandas.core.frame.DataFrame.to_sparse': ['type(df)'], # Uses df.index 'pandas.core.frame.DataFrame.to_records': ['*'], }) self.assertEqual(result.failed, 0)
def test_string_tests(self): PD_VERSION = tuple(int(v) for v in pd.__version__.split('.')) if PD_VERSION < (1, 2, 0): module = pd.core.strings else: # Definitions were moved to accessor in pandas 1.2.0 module = pd.core.strings.accessor module_name = module.__name__ result = doctests.testmod( module, use_beam=False, wont_implement_ok={ # These methods can accept deferred series objects, but not lists f'{module_name}.StringMethods.cat': [ "s.str.cat(['A', 'B', 'C', 'D'], sep=',')", "s.str.cat(['A', 'B', 'C', 'D'], sep=',', na_rep='-')", "s.str.cat(['A', 'B', 'C', 'D'], na_rep='-')" ], f'{module_name}.StringMethods.repeat': [ 's.str.repeat(repeats=[1, 2, 3])' ], f'{module_name}.str_repeat': ['s.str.repeat(repeats=[1, 2, 3])'], f'{module_name}.StringMethods.get_dummies': ['*'], f'{module_name}.str_get_dummies': ['*'], }, skip={ # count() on Series with a NaN produces mismatched type if we # have a NaN-only partition. f'{module_name}.StringMethods.count': ["s.str.count('a')"], f'{module_name}.str_count': ["s.str.count('a')"], # Produce None instead of NaN, see # frames_test.py::DeferredFrameTest::test_str_split f'{module_name}.StringMethods.rsplit': [ 's.str.split(expand=True)', 's.str.rsplit("/", n=1, expand=True)', ], f'{module_name}.StringMethods.split': [ 's.str.split(expand=True)', 's.str.rsplit("/", n=1, expand=True)', ], # Bad test strings in pandas 1.1.x f'{module_name}.str_replace': [ "pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr)" ], f'{module_name}.StringMethods.replace': [ "pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr)" ], # output has incorrect formatting in 1.2.x f'{module_name}.StringMethods.extractall': ['*'] }) self.assertEqual(result.failed, 0)
def test_series_tests(self): result = doctests.testmod( pd.core.series, use_beam=False, skip={ 'pandas.core.series.Series.append': ['*'], 'pandas.core.series.Series.argmax': ['*'], 'pandas.core.series.Series.argmin': ['*'], 'pandas.core.series.Series.autocorr': ['*'], 'pandas.core.series.Series.between': ['*'], 'pandas.core.series.Series.combine': ['*'], 'pandas.core.series.Series.combine_first': ['*'], 'pandas.core.series.Series.corr': ['*'], 'pandas.core.series.Series.count': ['*'], 'pandas.core.series.Series.cov': ['*'], 'pandas.core.series.Series.dot': ['*'], 'pandas.core.series.Series.drop': ['*'], 'pandas.core.series.Series.drop_duplicates': ['*'], 'pandas.core.series.Series.dropna': ['*'], 'pandas.core.series.Series.duplicated': ['*'], 'pandas.core.series.Series.explode': ['*'], 'pandas.core.series.Series.fillna': ['*'], 'pandas.core.series.Series.idxmax': ['*'], 'pandas.core.series.Series.idxmin': ['*'], 'pandas.core.series.Series.isin': ['*'], 'pandas.core.series.Series.items': ['*'], 'pandas.core.series.Series.iteritems': ['*'], 'pandas.core.series.Series.memory_usage': ['*'], 'pandas.core.series.Series.nlargest': ['*'], 'pandas.core.series.Series.nonzero': ['*'], 'pandas.core.series.Series.nsmallest': ['*'], 'pandas.core.series.Series.quantile': ['*'], 'pandas.core.series.Series.reindex': ['*'], 'pandas.core.series.Series.rename': ['*'], 'pandas.core.series.Series.repeat': ['*'], 'pandas.core.series.Series.replace': ['*'], 'pandas.core.series.Series.reset_index': ['*'], 'pandas.core.series.Series.round': ['*'], 'pandas.core.series.Series.searchsorted': ['*'], 'pandas.core.series.Series.shift': ['*'], 'pandas.core.series.Series.sort_index': ['*'], 'pandas.core.series.Series.sort_values': ['*'], 'pandas.core.series.Series.take': ['*'], 'pandas.core.series.Series.to_csv': ['*'], 'pandas.core.series.Series.to_dict': ['*'], 'pandas.core.series.Series.to_frame': ['*'], 'pandas.core.series.Series.unique': ['*'], 'pandas.core.series.Series.update': ['*'], 'pandas.core.series.Series.values': ['*'], 'pandas.core.series.Series.view': ['*'], }) self.assertEqual(result.failed, 0)
def test_string_tests(self): if PD_VERSION < (1, 2): module = pd.core.strings else: # Definitions were moved to accessor in pandas 1.2.0 module = pd.core.strings.accessor module_name = module.__name__ result = doctests.testmod( module, use_beam=False, wont_implement_ok={ # These methods can accept deferred series objects, but not lists f'{module_name}.StringMethods.cat': [ "s.str.cat(['A', 'B', 'C', 'D'], sep=',')", "s.str.cat(['A', 'B', 'C', 'D'], sep=',', na_rep='-')", "s.str.cat(['A', 'B', 'C', 'D'], na_rep='-')" ], f'{module_name}.StringMethods.repeat': [ 's.str.repeat(repeats=[1, 2, 3])' ], f'{module_name}.str_repeat': ['s.str.repeat(repeats=[1, 2, 3])'], # get_dummies pandas examples are not casted to CategoricalDtype # Must be CategoricalDtype to work in Beam f'{module_name}.StringMethods.get_dummies': ['*'], f'{module_name}.str_get_dummies': ['*'], f'{module_name}.StringMethods': ['s.str.split("_")'], f'{module_name}.StringMethods.rsplit': ['*'], f'{module_name}.StringMethods.split': ['*'], }, skip={ # count() on Series with a NaN produces mismatched type if we # have a NaN-only partition. f'{module_name}.StringMethods.count': ["s.str.count('a')"], f'{module_name}.str_count': ["s.str.count('a')"], # Bad test strings in pandas 1.1.x f'{module_name}.str_replace': [ "pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr)" ], f'{module_name}.StringMethods.replace': [ "pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr)" ], # output has incorrect formatting in 1.2.x f'{module_name}.StringMethods.extractall': ['*'] }) self.assertEqual(result.failed, 0)
def test_indexing_tests(self): result = doctests.testmod(pd.core.indexing, use_beam=False, skip={ 'pandas.core.indexing._AtIndexer': ['*'], 'pandas.core.indexing._IndexSlice': ['*'], 'pandas.core.indexing._LocIndexer': ['*'], 'pandas.core.indexing._iAtIndexer': ['*'], 'pandas.core.indexing._iLocIndexer': ['*'], }) self.assertEqual(result.failed, 0)
def test_series_tests(self): result = doctests.testmod( pd.core.series, use_beam=False, report=True, wont_implement_ok={ 'pandas.core.series.Series.__array__': ['*'], 'pandas.core.series.Series.array': ['*'], 'pandas.core.series.Series.cummax': ['*'], 'pandas.core.series.Series.cummin': ['*'], 'pandas.core.series.Series.cumsum': ['*'], 'pandas.core.series.Series.cumprod': ['*'], 'pandas.core.series.Series.diff': ['*'], 'pandas.core.series.Series.dot': [ 's.dot(arr)', # non-deferred result ], 'pandas.core.series.Series.fillna': [ "df.fillna(method='ffill')", 'df.fillna(value=values, limit=1)', ], 'pandas.core.series.Series.items': ['*'], 'pandas.core.series.Series.iteritems': ['*'], # default keep is 'first' 'pandas.core.series.Series.nlargest': [ "s.nlargest()", "s.nlargest(3)", "s.nlargest(3, keep='last')", ], 'pandas.core.series.Series.memory_usage': ['*'], 'pandas.core.series.Series.nsmallest': [ "s.nsmallest()", "s.nsmallest(3)", "s.nsmallest(3, keep='last')", ], 'pandas.core.series.Series.pop': ['*'], 'pandas.core.series.Series.searchsorted': ['*'], 'pandas.core.series.Series.shift': ['*'], 'pandas.core.series.Series.take': ['*'], 'pandas.core.series.Series.to_dict': ['*'], 'pandas.core.series.Series.unique': ['*'], 'pandas.core.series.Series.unstack': ['*'], 'pandas.core.series.Series.values': ['*'], 'pandas.core.series.Series.view': ['*'], 'pandas.core.series.Series.append': [ 's1.append(s2, ignore_index=True)', ], }, not_implemented_ok={ 'pandas.core.series.Series.transform': ['*'], 'pandas.core.series.Series.groupby': [ 'ser.groupby(["a", "b", "a", "b"]).mean()', 'ser.groupby(["a", "b", "a", np.nan]).mean()', 'ser.groupby(["a", "b", "a", np.nan], dropna=False).mean()', # Grouping by a series is not supported 'ser.groupby(ser > 100).mean()', ], 'pandas.core.series.Series.reindex': ['*'], }, skip={ # error formatting 'pandas.core.series.Series.append': [ 's1.append(s2, verify_integrity=True)', ], # Throws NotImplementedError when modifying df 'pandas.core.series.Series.transform': ['df'], 'pandas.core.series.Series.argmax': ['*'], 'pandas.core.series.Series.argmin': ['*'], 'pandas.core.series.Series.autocorr': ['*'], 'pandas.core.series.Series.combine': ['*'], 'pandas.core.series.Series.combine_first': ['*'], 'pandas.core.series.Series.compare': ['*'], 'pandas.core.series.Series.cov': [ # Differs in LSB on jenkins. "s1.cov(s2)", ], 'pandas.core.series.Series.drop_duplicates': ['*'], 'pandas.core.series.Series.duplicated': ['*'], 'pandas.core.series.Series.explode': ['*'], 'pandas.core.series.Series.idxmax': ['*'], 'pandas.core.series.Series.idxmin': ['*'], 'pandas.core.series.Series.name': ['*'], 'pandas.core.series.Series.nonzero': ['*'], 'pandas.core.series.Series.quantile': ['*'], 'pandas.core.series.Series.pop': ['ser'], # testing side effect 'pandas.core.series.Series.repeat': ['*'], 'pandas.core.series.Series.replace': ['*'], 'pandas.core.series.Series.reset_index': ['*'], 'pandas.core.series.Series.searchsorted': [ # This doctest seems to be incorrectly parsed. "x = pd.Categorical(['apple', 'bread', 'bread'," ], 'pandas.core.series.Series.set_axis': ['*'], 'pandas.core.series.Series.sort_index': ['*'], 'pandas.core.series.Series.sort_values': ['*'], 'pandas.core.series.Series.to_csv': ['*'], 'pandas.core.series.Series.to_markdown': ['*'], 'pandas.core.series.Series.update': ['*'], 'pandas.core.series.Series.view': [ # Inspection after modification. 's' ], }) self.assertEqual(result.failed, 0)
def test_groupby_tests(self): result = doctests.testmod( pd.core.groupby.groupby, use_beam=False, wont_implement_ok={ 'pandas.core.groupby.groupby.GroupBy.head': ['*'], 'pandas.core.groupby.groupby.GroupBy.tail': ['*'], 'pandas.core.groupby.groupby.GroupBy.nth': ['*'], 'pandas.core.groupby.groupby.GroupBy.cumcount': ['*'], }, not_implemented_ok={ 'pandas.core.groupby.groupby.GroupBy.describe': ['*'], 'pandas.core.groupby.groupby.GroupBy.ngroup': ['*'], 'pandas.core.groupby.groupby.GroupBy.resample': ['*'], 'pandas.core.groupby.groupby.GroupBy.sample': ['*'], 'pandas.core.groupby.groupby.GroupBy.quantile': ['*'], 'pandas.core.groupby.groupby.BaseGroupBy.pipe': ['*'], # pipe tests are in a different location in pandas 1.1.x 'pandas.core.groupby.groupby._GroupBy.pipe': ['*'], 'pandas.core.groupby.groupby.GroupBy.nth': [ "df.groupby('A', as_index=False).nth(1)", ], }, skip={ # Uses iloc to mutate a DataFrame 'pandas.core.groupby.groupby.GroupBy.resample': [ 'df.iloc[2, 0] = 5', 'df', ], # TODO: Raise wont implement for list passed as a grouping column # Currently raises unhashable type: list 'pandas.core.groupby.groupby.GroupBy.ngroup': ['df.groupby(["A", [1,1,2,3,2,1]]).ngroup()'], }) self.assertEqual(result.failed, 0) result = doctests.testmod( pd.core.groupby.generic, use_beam=False, wont_implement_ok={ # Returns an array by default, not a Series. WontImplement # (non-deferred) 'pandas.core.groupby.generic.SeriesGroupBy.unique': ['*'], # TODO: Is take actually deprecated? 'pandas.core.groupby.generic.DataFrameGroupBy.take': ['*'], 'pandas.core.groupby.generic.SeriesGroupBy.take': ['*'], 'pandas.core.groupby.generic.SeriesGroupBy.nsmallest': [ "s.nsmallest(3, keep='last')", "s.nsmallest(3)", "s.nsmallest()", ], 'pandas.core.groupby.generic.SeriesGroupBy.nlargest': [ "s.nlargest(3, keep='last')", "s.nlargest(3)", "s.nlargest()", ], 'pandas.core.groupby.generic.DataFrameGroupBy.diff': ['*'], 'pandas.core.groupby.generic.SeriesGroupBy.diff': ['*'], 'pandas.core.groupby.generic.DataFrameGroupBy.hist': ['*'], 'pandas.core.groupby.generic.DataFrameGroupBy.fillna': [ "df.fillna(method='ffill')", 'df.fillna(value=values, limit=1)', ], 'pandas.core.groupby.generic.SeriesGroupBy.fillna': [ "df.fillna(method='ffill')", 'df.fillna(value=values, limit=1)', ], }, not_implemented_ok={ 'pandas.core.groupby.generic.DataFrameGroupBy.transform': ['*'], 'pandas.core.groupby.generic.DataFrameGroupBy.idxmax': ['*'], 'pandas.core.groupby.generic.DataFrameGroupBy.idxmin': ['*'], 'pandas.core.groupby.generic.DataFrameGroupBy.filter': ['*'], 'pandas.core.groupby.generic.DataFrameGroupBy.nunique': ['*'], 'pandas.core.groupby.generic.SeriesGroupBy.transform': ['*'], 'pandas.core.groupby.generic.SeriesGroupBy.idxmax': ['*'], 'pandas.core.groupby.generic.SeriesGroupBy.idxmin': ['*'], 'pandas.core.groupby.generic.SeriesGroupBy.filter': ['*'], 'pandas.core.groupby.generic.SeriesGroupBy.describe': ['*'], }, skip={ 'pandas.core.groupby.generic.SeriesGroupBy.cov': [ # Floating point comparison fails 's1.cov(s2)', ], 'pandas.core.groupby.generic.DataFrameGroupBy.cov': [ # Mutates input DataFrame with loc # TODO: Replicate in frames_test.py "df.loc[df.index[:5], 'a'] = np.nan", "df.loc[df.index[5:10], 'b'] = np.nan", "df.cov(min_periods=12)", ], # These examples rely on grouping by a list 'pandas.core.groupby.generic.SeriesGroupBy.aggregate': ['*'], 'pandas.core.groupby.generic.DataFrameGroupBy.aggregate': ['*'], }) self.assertEqual(result.failed, 0)
def test_dataframe_tests(self): result = doctests.testmod( pd.core.frame, use_beam=False, report=True, wont_implement_ok={ 'pandas.core.frame.DataFrame.T': ['*'], 'pandas.core.frame.DataFrame.cummax': ['*'], 'pandas.core.frame.DataFrame.cummin': ['*'], 'pandas.core.frame.DataFrame.cumsum': ['*'], 'pandas.core.frame.DataFrame.cumprod': ['*'], 'pandas.core.frame.DataFrame.diff': ['*'], 'pandas.core.frame.DataFrame.fillna': [ "df.fillna(method='ffill')", 'df.fillna(value=values, limit=1)', ], 'pandas.core.frame.DataFrame.items': ['*'], 'pandas.core.frame.DataFrame.itertuples': ['*'], 'pandas.core.frame.DataFrame.iterrows': ['*'], 'pandas.core.frame.DataFrame.iteritems': ['*'], # default keep is 'first' 'pandas.core.frame.DataFrame.nlargest': [ "df.nlargest(3, 'population')", "df.nlargest(3, ['population', 'GDP'])", "df.nlargest(3, 'population', keep='last')" ], 'pandas.core.frame.DataFrame.nsmallest': [ "df.nsmallest(3, 'population')", "df.nsmallest(3, ['population', 'GDP'])", "df.nsmallest(3, 'population', keep='last')", ], 'pandas.core.frame.DataFrame.replace': [ "s.replace([1, 2], method='bfill')", # Relies on method='pad' "s.replace('a', None)", ], 'pandas.core.frame.DataFrame.to_records': ['*'], 'pandas.core.frame.DataFrame.to_dict': ['*'], 'pandas.core.frame.DataFrame.to_numpy': ['*'], 'pandas.core.frame.DataFrame.to_string': ['*'], 'pandas.core.frame.DataFrame.transpose': ['*'], 'pandas.core.frame.DataFrame.shape': ['*'], 'pandas.core.frame.DataFrame.shift': [ 'df.shift(periods=3, freq="D")', 'df.shift(periods=3, freq="infer")' ], 'pandas.core.frame.DataFrame.unstack': ['*'], 'pandas.core.frame.DataFrame.memory_usage': ['*'], 'pandas.core.frame.DataFrame.info': ['*'], # Not equal to df.agg('mode', axis='columns', numeric_only=True) # because there can be multiple columns if a row has more than one # mode 'pandas.core.frame.DataFrame.mode': ["df.mode(axis='columns', numeric_only=True)"], 'pandas.core.frame.DataFrame.append': [ 'df.append(df2, ignore_index=True)', "for i in range(5):\n" + " df = df.append({'A': i}, ignore_index=True)", ], }, not_implemented_ok={ 'pandas.core.frame.DataFrame.transform': ['*'], 'pandas.core.frame.DataFrame.isin': ['*'], 'pandas.core.frame.DataFrame.melt': ['*'], 'pandas.core.frame.DataFrame.reindex': ['*'], 'pandas.core.frame.DataFrame.reindex_axis': ['*'], 'pandas.core.frame.DataFrame.round': [ 'df.round(decimals)', ], # We should be able to support pivot and pivot_table for categorical # columns 'pandas.core.frame.DataFrame.pivot': ['*'], # We can implement this as a zipping operator, but it won't have the # same capability. The doctest includes an example that branches on # a deferred result. 'pandas.core.frame.DataFrame.combine': ['*'], # Can be implemented as a zipping operator 'pandas.core.frame.DataFrame.combine_first': ['*'], # Difficult to parallelize but should be possible? 'pandas.core.frame.DataFrame.dot': [ # reindex not supported 's2 = s.reindex([1, 0, 2, 3])', 'df.dot(s2)', ], # Trivially elementwise for axis=columns. Relies on global indexing # for axis=rows. # Difficult to determine proxy, need to inspect function 'pandas.core.frame.DataFrame.apply': ['*'], # Cross-join not implemented 'pandas.core.frame.DataFrame.merge': ["df1.merge(df2, how='cross')"], # TODO(BEAM-11711) 'pandas.core.frame.DataFrame.set_index': [ "df.set_index([s, s**2])", ], }, skip={ # Throws NotImplementedError when modifying df 'pandas.core.frame.DataFrame.transform': ['df'], 'pandas.core.frame.DataFrame.axes': [ # Returns deferred index. 'df.axes', ], 'pandas.core.frame.DataFrame.compare': ['*'], 'pandas.core.frame.DataFrame.cov': [ # Relies on setting entries ahead of time. "df.loc[df.index[:5], 'a'] = np.nan", "df.loc[df.index[5:10], 'b'] = np.nan", 'df.cov(min_periods=12)', ], 'pandas.core.frame.DataFrame.drop_duplicates': ['*'], 'pandas.core.frame.DataFrame.duplicated': ['*'], 'pandas.core.frame.DataFrame.idxmax': ['*'], 'pandas.core.frame.DataFrame.idxmin': ['*'], 'pandas.core.frame.DataFrame.rename': [ # Returns deferred index. 'df.index', 'df.rename(index=str).index', ], 'pandas.core.frame.DataFrame.set_index': [ # TODO(BEAM-11711): This could pass in the index as # a DeferredIndex, and we should fail it as order-sensitive. "df.set_index([pd.Index([1, 2, 3, 4]), 'year'])", ], 'pandas.core.frame.DataFrame.set_axis': ['*'], 'pandas.core.frame.DataFrame.sort_index': ['*'], 'pandas.core.frame.DataFrame.to_markdown': ['*'], 'pandas.core.frame.DataFrame.to_parquet': ['*'], 'pandas.core.frame.DataFrame.value_counts': ['*'], 'pandas.core.frame.DataFrame.to_records': [ 'df.index = df.index.rename("I")', 'index_dtypes = f"<S{df.index.str.len().max()}"', # 1.x 'index_dtypes = "<S{}".format(df.index.str.len().max())', #0.x 'df.to_records(index_dtypes=index_dtypes)', ], # These tests use the static method pd.pivot_table, which doesn't # actually raise NotImplementedError 'pandas.core.frame.DataFrame.pivot_table': ['*'], # Expected to raise a ValueError, but we raise NotImplementedError 'pandas.core.frame.DataFrame.pivot': ["df.pivot(index='foo', columns='bar', values='baz')"], 'pandas.core.frame.DataFrame.append': [ 'df', # pylint: disable=line-too-long "pd.concat([pd.DataFrame([i], columns=['A']) for i in range(5)],\n" " ignore_index=True)" ], 'pandas.core.frame.DataFrame.eval': ['df'], 'pandas.core.frame.DataFrame.melt': ["df.columns = [list('ABC'), list('DEF')]", "df"], 'pandas.core.frame.DataFrame.merge': [ # Order-sensitive index, checked in frames_test.py. "df1.merge(df2, left_on='lkey', right_on='rkey')", "df1.merge(df2, left_on='lkey', right_on='rkey',\n" " suffixes=('_left', '_right'))", "df1.merge(df2, how='left', on='a')", ], # Raises right exception, but testing framework has matching issues. 'pandas.core.frame.DataFrame.replace': [ "df.replace({'a string': 'new value', True: False}) # raises" ], 'pandas.core.frame.DataFrame.to_sparse': ['type(df)'], # Skipped because "seen_wont_implement" is reset before getting to # these calls, so the NameError they raise is not ignored. 'pandas.core.frame.DataFrame.T': ['df1_transposed.dtypes', 'df2_transposed.dtypes'], 'pandas.core.frame.DataFrame.transpose': ['df1_transposed.dtypes', 'df2_transposed.dtypes'], # Skipped because the relies on iloc to set a cell to NA. Test is # replicated in frames_test::DeferredFrameTest::test_applymap. 'pandas.core.frame.DataFrame.applymap': [ 'df_copy.iloc[0, 0] = pd.NA', "df_copy.applymap(lambda x: len(str(x)), na_action='ignore')", ], # Skipped so we don't need to install natsort 'pandas.core.frame.DataFrame.sort_values': [ 'from natsort import index_natsorted', 'df.sort_values(\n' ' by="time",\n' ' key=lambda x: np.argsort(index_natsorted(df["time"]))\n' ')' ], # Mode that we don't yet support, documentation added in pandas # 1.2.0 (https://github.com/pandas-dev/pandas/issues/35912) 'pandas.core.frame.DataFrame.aggregate': ["df.agg(x=('A', max), y=('B', 'min'), z=('C', np.mean))"], }) self.assertEqual(result.failed, 0)
def test_ndframe_tests(self): # IO methods are tested in io_test.py skip_writes = { f'pandas.core.generic.NDFrame.{name}': ['*'] for name in dir(pd.core.generic.NDFrame) if name.startswith('to_') } result = doctests.testmod( pd.core.generic, use_beam=False, report=True, wont_implement_ok={ 'pandas.core.generic.NDFrame.first': ['*'], 'pandas.core.generic.NDFrame.head': ['*'], 'pandas.core.generic.NDFrame.last': ['*'], 'pandas.core.generic.NDFrame.shift': ['*'], 'pandas.core.generic.NDFrame.tail': ['*'], 'pandas.core.generic.NDFrame.take': ['*'], 'pandas.core.generic.NDFrame.values': ['*'], 'pandas.core.generic.NDFrame.tz_localize': [ "s.tz_localize('CET', ambiguous='infer')", # np.array is not a deferred object. This use-case is possible # with a deferred Series though, which is tested in # frames_test.py "s.tz_localize('CET', ambiguous=np.array([True, True, False]))", ], 'pandas.core.generic.NDFrame.truncate': [ # These inputs rely on tail (wont implement, order # sensitive) for verification "df.tail()", "df.loc['2016-01-05':'2016-01-10', :].tail()", ], 'pandas.core.generic.NDFrame.replace': [ "s.replace([1, 2], method='bfill')", # Relies on method='pad' "s.replace('a', None)", ], 'pandas.core.generic.NDFrame.fillna': [ "df.fillna(method='ffill')", 'df.fillna(value=values, limit=1)', ], }, not_implemented_ok={ 'pandas.core.generic.NDFrame.add_prefix': ['*'], 'pandas.core.generic.NDFrame.add_suffix': ['*'], 'pandas.core.generic.NDFrame.asof': ['*'], 'pandas.core.generic.NDFrame.at_time': ['*'], 'pandas.core.generic.NDFrame.between_time': ['*'], 'pandas.core.generic.NDFrame.describe': ['*'], 'pandas.core.generic.NDFrame.ewm': ['*'], 'pandas.core.generic.NDFrame.expanding': ['*'], 'pandas.core.generic.NDFrame.flags': ['*'], 'pandas.core.generic.NDFrame.interpolate': ['*'], 'pandas.core.generic.NDFrame.mask': ['*'], 'pandas.core.generic.NDFrame.pct_change': ['*'], 'pandas.core.generic.NDFrame.rank': ['*'], 'pandas.core.generic.NDFrame.reindex': ['*'], 'pandas.core.generic.NDFrame.reindex_like': ['*'], 'pandas.core.generic.NDFrame.replace': ['*'], 'pandas.core.generic.NDFrame.resample': ['*'], 'pandas.core.generic.NDFrame.rolling': ['*'], 'pandas.core.generic.NDFrame.sample': ['*'], 'pandas.core.generic.NDFrame.set_flags': ['*'], 'pandas.core.generic.NDFrame.squeeze': ['*'], 'pandas.core.generic.NDFrame.transform': ['*'], 'pandas.core.generic.NDFrame.truncate': ['*'], 'pandas.core.generic.NDFrame.where': ['*'], 'pandas.core.generic.NDFrame.xs': ['*'], # argsort unimplemented 'pandas.core.generic.NDFrame.abs': [ 'df.loc[(df.c - 43).abs().argsort()]', ], }, skip={ # Internal test 'pandas.core.generic.NDFrame._set_axis_name': ['*'], # Fails to construct test series. asfreq is not implemented anyway. 'pandas.core.generic.NDFrame.asfreq': ['*'], 'pandas.core.generic.NDFrame.astype': ['*'], 'pandas.core.generic.NDFrame.convert_dtypes': ['*'], 'pandas.core.generic.NDFrame.copy': ['*'], 'pandas.core.generic.NDFrame.droplevel': ['*'], 'pandas.core.generic.NDFrame.infer_objects': ['*'], 'pandas.core.generic.NDFrame.rank': [ # Modified dataframe 'df' ], 'pandas.core.generic.NDFrame.rename': [ # Seems to be an upstream bug. The actual error has a different # message: # TypeError: Index(...) must be called with a collection of # some kind, 2 was passed # pandas doctests only verify the type of exception 'df.rename(2)' ], # Tests rely on setting index 'pandas.core.generic.NDFrame.rename_axis': ['*'], # Raises right exception, but testing framework has matching issues. 'pandas.core.generic.NDFrame.replace': [ "df.replace({'a string': 'new value', True: False}) # raises" ], 'pandas.core.generic.NDFrame.squeeze': ['*'], # NameError 'pandas.core.generic.NDFrame.resample': ['df'], # Skipped so we don't need to install natsort 'pandas.core.generic.NDFrame.sort_values': [ 'from natsort import index_natsorted', 'df.sort_values(\n' ' by="time",\n' ' key=lambda x: np.argsort(index_natsorted(df["time"]))\n' ')' ], **skip_writes }) self.assertEqual(result.failed, 0)
def test_dataframe_tests(self): result = doctests.testmod( pd.core.frame, use_beam=False, report=True, wont_implement_ok={ 'pandas.core.frame.DataFrame.T': ['*'], 'pandas.core.frame.DataFrame.cummax': ['*'], 'pandas.core.frame.DataFrame.cummin': ['*'], 'pandas.core.frame.DataFrame.cumsum': ['*'], 'pandas.core.frame.DataFrame.cumprod': ['*'], 'pandas.core.frame.DataFrame.diff': ['*'], 'pandas.core.frame.DataFrame.items': ['*'], 'pandas.core.frame.DataFrame.itertuples': ['*'], 'pandas.core.frame.DataFrame.iterrows': ['*'], 'pandas.core.frame.DataFrame.iteritems': ['*'], # default keep is 'first' 'pandas.core.frame.DataFrame.nlargest': [ "df.nlargest(3, 'population')", "df.nlargest(3, ['population', 'GDP'])", "df.nlargest(3, 'population', keep='last')" ], 'pandas.core.frame.DataFrame.nsmallest': [ "df.nsmallest(3, 'population')", "df.nsmallest(3, ['population', 'GDP'])", "df.nsmallest(3, 'population', keep='last')", ], 'pandas.core.frame.DataFrame.nunique': ['*'], 'pandas.core.frame.DataFrame.to_records': ['*'], 'pandas.core.frame.DataFrame.to_dict': ['*'], 'pandas.core.frame.DataFrame.to_numpy': ['*'], 'pandas.core.frame.DataFrame.to_string': ['*'], 'pandas.core.frame.DataFrame.transpose': ['*'], 'pandas.core.frame.DataFrame.shape': ['*'], 'pandas.core.frame.DataFrame.shift': [ 'df.shift(periods=3, freq="D")', 'df.shift(periods=3, freq="infer")' ], 'pandas.core.frame.DataFrame.unstack': ['*'], 'pandas.core.frame.DataFrame.memory_usage': ['*'], 'pandas.core.frame.DataFrame.info': ['*'], # Not equal to df.agg('mode', axis='columns', numeric_only=True) # because there can be multiple columns if a row has more than one # mode 'pandas.core.frame.DataFrame.mode': [ "df.mode(axis='columns', numeric_only=True)" ], }, not_implemented_ok={ 'pandas.core.frame.DataFrame.isin': ['*'], 'pandas.core.frame.DataFrame.melt': ['*'], 'pandas.core.frame.DataFrame.axes': ['*'], 'pandas.core.frame.DataFrame.count': ['*'], 'pandas.core.frame.DataFrame.reindex': ['*'], 'pandas.core.frame.DataFrame.reindex_axis': ['*'], # We should be able to support pivot and pivot_table for categorical # columns 'pandas.core.frame.DataFrame.pivot': ['*'], # DataFrame.__getitem__ cannot be used as loc 'pandas.core.frame.DataFrame.query': [ 'df[df.A > df.B]', "df[df.B == df['C C']]" ], # We can implement this as a zipping operator, but it won't have the # same capability. The doctest includes an example that branches on # a deferred result. 'pandas.core.frame.DataFrame.combine': ['*'], # Can be implemented as a zipping operator 'pandas.core.frame.DataFrame.combine_first': ['*'], # Difficult to parallelize but should be possible? 'pandas.core.frame.DataFrame.corr': ['*'], 'pandas.core.frame.DataFrame.cov': ['*'], 'pandas.core.frame.DataFrame.dot': ['*'], # element-wise 'pandas.core.frame.DataFrame.eval': ['*'], 'pandas.core.frame.DataFrame.explode': ['*'], # Trivially elementwise for axis=columns. Relies on global indexing # for axis=rows. 'pandas.core.frame.DataFrame.drop': ['*'], 'pandas.core.frame.DataFrame.rename': ['*'], 'pandas.core.frame.DataFrame.apply': ['*'], # Zipping operation if input is a DeferredSeries 'pandas.core.frame.DataFrame.assign': ['*'], # In theory this is possible for bounded inputs? 'pandas.core.frame.DataFrame.append': ['*'], }, skip={ 'pandas.core.frame.DataFrame.compare': ['*'], 'pandas.core.frame.DataFrame.drop_duplicates': ['*'], 'pandas.core.frame.DataFrame.duplicated': ['*'], 'pandas.core.frame.DataFrame.groupby': [ 'df.groupby(level=0).mean()', 'df.groupby(level="Type").mean()', 'df.groupby(by=["b"], dropna=False).sum()', 'df.groupby(by="a", dropna=False).sum()' ], 'pandas.core.frame.DataFrame.idxmax': ['*'], 'pandas.core.frame.DataFrame.idxmin': ['*'], 'pandas.core.frame.DataFrame.pop': ['*'], 'pandas.core.frame.DataFrame.set_axis': ['*'], 'pandas.core.frame.DataFrame.sort_index': ['*'], 'pandas.core.frame.DataFrame.to_markdown': ['*'], 'pandas.core.frame.DataFrame.to_parquet': ['*'], 'pandas.core.frame.DataFrame.value_counts': ['*'], 'pandas.core.frame.DataFrame.to_records': [ 'df.index = df.index.rename("I")', 'index_dtypes = f"<S{df.index.str.len().max()}"', # 1.x 'index_dtypes = "<S{}".format(df.index.str.len().max())', #0.x 'df.to_records(index_dtypes=index_dtypes)', ], # These tests use the static method pd.pivot_table, which doesn't # actually raise NotImplementedError 'pandas.core.frame.DataFrame.pivot_table': ['*'], # Expected to raise a ValueError, but we raise NotImplementedError 'pandas.core.frame.DataFrame.pivot': [ "df.pivot(index='foo', columns='bar', values='baz')" ], 'pandas.core.frame.DataFrame.append': [ 'df', # pylint: disable=line-too-long "pd.concat([pd.DataFrame([i], columns=['A']) for i in range(5)],\n" " ignore_index=True)" ], 'pandas.core.frame.DataFrame.eval': ['df'], # No override for __matmul__ and friends 'pandas.core.frame.DataFrame.dot': ['df @ other'], 'pandas.core.frame.DataFrame.melt': [ "df.columns = [list('ABC'), list('DEF')]", "df" ], 'pandas.core.frame.DataFrame.merge': [ # Order-sensitive index, checked in frames_test.py. "df1.merge(df2, left_on='lkey', right_on='rkey')", "df1.merge(df2, left_on='lkey', right_on='rkey',\n" " suffixes=('_left', '_right'))", ], # Raises right exception, but testing framework has matching issues. 'pandas.core.frame.DataFrame.replace': [ "df.replace({'a string': 'new value', True: False}) # raises" ], # Should raise WontImplement order-sensitive 'pandas.core.frame.DataFrame.set_index': [ "df.set_index([pd.Index([1, 2, 3, 4]), 'year'])", "df.set_index([s, s**2])", ], 'pandas.core.frame.DataFrame.to_sparse': ['type(df)'], # DeferredSeries has no attribute dtype. Should we allow this and # defer to proxy? 'pandas.core.frame.DataFrame.iterrows': ["print(df['int'].dtype)"], # Skipped because "seen_wont_implement" is reset before getting to # these calls, so the NameError they raise is not ignored. 'pandas.core.frame.DataFrame.T': [ 'df1_transposed.dtypes', 'df2_transposed.dtypes' ], 'pandas.core.frame.DataFrame.transpose': [ 'df1_transposed.dtypes', 'df2_transposed.dtypes' ], }) self.assertEqual(result.failed, 0)
def test_dataframe_tests(self): result = doctests.testmod( pd.core.frame, use_beam=False, report=True, wont_implement_ok={ 'pandas.core.frame.DataFrame.T': ['*'], 'pandas.core.frame.DataFrame.cummax': ['*'], 'pandas.core.frame.DataFrame.cummin': ['*'], 'pandas.core.frame.DataFrame.cumsum': ['*'], 'pandas.core.frame.DataFrame.cumprod': ['*'], 'pandas.core.frame.DataFrame.diff': ['*'], 'pandas.core.frame.DataFrame.items': ['*'], 'pandas.core.frame.DataFrame.itertuples': ['*'], 'pandas.core.frame.DataFrame.iterrows': ['*'], 'pandas.core.frame.DataFrame.iteritems': ['*'], # default keep is 'first' 'pandas.core.frame.DataFrame.nlargest': [ "df.nlargest(3, 'population')", "df.nlargest(3, ['population', 'GDP'])", "df.nlargest(3, 'population', keep='last')" ], 'pandas.core.frame.DataFrame.nsmallest': [ "df.nsmallest(3, 'population')", "df.nsmallest(3, ['population', 'GDP'])", "df.nsmallest(3, 'population', keep='last')", ], 'pandas.core.frame.DataFrame.nunique': ['*'], 'pandas.core.frame.DataFrame.to_records': ['*'], 'pandas.core.frame.DataFrame.to_dict': ['*'], 'pandas.core.frame.DataFrame.to_numpy': ['*'], 'pandas.core.frame.DataFrame.to_string': ['*'], 'pandas.core.frame.DataFrame.transpose': ['*'], 'pandas.core.frame.DataFrame.shape': ['*'], 'pandas.core.frame.DataFrame.shift': [ 'df.shift(periods=3, freq="D")', 'df.shift(periods=3, freq="infer")' ], 'pandas.core.frame.DataFrame.unstack': ['*'], 'pandas.core.frame.DataFrame.memory_usage': ['*'], }, skip={ 'pandas.core.frame.DataFrame.T': ['df1_transposed.dtypes', 'df2_transposed.dtypes'], 'pandas.core.frame.DataFrame.agg': ['*'], 'pandas.core.frame.DataFrame.aggregate': ['*'], 'pandas.core.frame.DataFrame.append': ['*'], 'pandas.core.frame.DataFrame.apply': ['*'], 'pandas.core.frame.DataFrame.applymap': ['df ** 2'], 'pandas.core.frame.DataFrame.assign': ['*'], 'pandas.core.frame.DataFrame.axes': ['*'], 'pandas.core.frame.DataFrame.combine': ['*'], 'pandas.core.frame.DataFrame.combine_first': ['*'], 'pandas.core.frame.DataFrame.compare': ['*'], 'pandas.core.frame.DataFrame.corr': ['*'], 'pandas.core.frame.DataFrame.count': ['*'], 'pandas.core.frame.DataFrame.cov': ['*'], 'pandas.core.frame.DataFrame.dot': ['*'], 'pandas.core.frame.DataFrame.drop': ['*'], 'pandas.core.frame.DataFrame.drop_duplicates': ['*'], 'pandas.core.frame.DataFrame.duplicated': ['*'], 'pandas.core.frame.DataFrame.eval': ['*'], 'pandas.core.frame.DataFrame.explode': ['*'], 'pandas.core.frame.DataFrame.groupby': [ # More keyword arguments. 'df.groupby(level=0).mean()', 'df.groupby(level="Type").mean()', 'df.groupby(by=["b"], dropna=False).sum()', 'df.groupby(by="a", dropna=False).sum()' ], 'pandas.core.frame.DataFrame.idxmax': ['*'], 'pandas.core.frame.DataFrame.idxmin': ['*'], 'pandas.core.frame.DataFrame.info': ['*'], 'pandas.core.frame.DataFrame.isin': ['*'], 'pandas.core.frame.DataFrame.iterrows': ["print(df['int'].dtype)"], 'pandas.core.frame.DataFrame.melt': ['*'], 'pandas.core.frame.DataFrame.memory_usage': ['*'], 'pandas.core.frame.DataFrame.merge': [ # Order-sensitive index, checked in frames_test.py. "df1.merge(df2, left_on='lkey', right_on='rkey')", "df1.merge(df2, left_on='lkey', right_on='rkey',\n" " suffixes=('_left', '_right'))", ], # Not equal to df.agg('mode', axis='columns', numeric_only=True) 'pandas.core.frame.DataFrame.mode': ["df.mode(axis='columns', numeric_only=True)"], 'pandas.core.frame.DataFrame.pivot': ['*'], 'pandas.core.frame.DataFrame.pivot_table': ['*'], 'pandas.core.frame.DataFrame.pop': ['*'], 'pandas.core.frame.DataFrame.query': ['*'], 'pandas.core.frame.DataFrame.reindex': ['*'], # Sets df.index 'pandas.core.frame.DataFrame.reindex_axis': ['*'], 'pandas.core.frame.DataFrame.rename': ['*'], # Raises right exception, but testing framework has matching issues. 'pandas.core.frame.DataFrame.replace': [ "df.replace({'a string': 'new value', True: False}) # raises" ], # Uses unseeded np.random. 'pandas.core.frame.DataFrame.round': ['*'], 'pandas.core.frame.DataFrame.set_axis': ['*'], 'pandas.core.frame.DataFrame.set_index': ['*'], 'pandas.core.frame.DataFrame.sort_index': ['*'], 'pandas.core.frame.DataFrame.transpose': ['df1_transposed.dtypes', 'df2_transposed.dtypes'], 'pandas.core.frame.DataFrame.to_markdown': ['*'], 'pandas.core.frame.DataFrame.to_parquet': ['*'], # Uses df.index 'pandas.core.frame.DataFrame.to_records': ['*'], 'pandas.core.frame.DataFrame.to_sparse': ['type(df)'], 'pandas.core.frame.DataFrame.value_counts': ['*'], }) self.assertEqual(result.failed, 0)
def test_series_tests(self): result = doctests.testmod( pd.core.series, use_beam=False, report=True, wont_implement_ok={ 'pandas.core.series.Series.__array__': ['*'], 'pandas.core.series.Series.cummax': ['*'], 'pandas.core.series.Series.cummin': ['*'], 'pandas.core.series.Series.cumsum': ['*'], 'pandas.core.series.Series.cumprod': ['*'], 'pandas.core.series.Series.diff': ['*'], 'pandas.core.series.Series.items': ['*'], 'pandas.core.series.Series.iteritems': ['*'], # default keep is 'first' 'pandas.core.series.Series.nlargest': [ "s.nlargest()", "s.nlargest(3)", "s.nlargest(3, keep='last')", ], 'pandas.core.series.Series.memory_usage': ['*'], 'pandas.core.series.Series.nsmallest': [ "s.nsmallest()", "s.nsmallest(3)", "s.nsmallest(3, keep='last')", ], 'pandas.core.series.Series.searchsorted': ['*'], 'pandas.core.series.Series.shift': ['*'], 'pandas.core.series.Series.take': ['*'], 'pandas.core.series.Series.to_dict': ['*'], 'pandas.core.series.Series.unique': ['*'], 'pandas.core.series.Series.unstack': ['*'], 'pandas.core.series.Series.values': ['*'], 'pandas.core.series.Series.view': ['*'], }, not_implemented_ok={ 'pandas.core.series.Series.reindex': ['*'], }, skip={ 'pandas.core.series.Series.array': ['*'], 'pandas.core.series.Series.append': ['*'], 'pandas.core.series.Series.argmax': ['*'], 'pandas.core.series.Series.argmin': ['*'], 'pandas.core.series.Series.autocorr': ['*'], 'pandas.core.series.Series.combine': ['*'], 'pandas.core.series.Series.combine_first': ['*'], 'pandas.core.series.Series.compare': ['*'], 'pandas.core.series.Series.corr': ['*'], 'pandas.core.series.Series.count': ['*'], 'pandas.core.series.Series.cov': ['*'], 'pandas.core.series.Series.dot': ['*'], 'pandas.core.series.Series.drop': ['*'], 'pandas.core.series.Series.drop_duplicates': ['*'], 'pandas.core.series.Series.duplicated': ['*'], 'pandas.core.series.Series.explode': ['*'], 'pandas.core.series.Series.groupby': ['*'], 'pandas.core.series.Series.idxmax': ['*'], 'pandas.core.series.Series.idxmin': ['*'], 'pandas.core.series.Series.name': ['*'], 'pandas.core.series.Series.nonzero': ['*'], 'pandas.core.series.Series.pop': ['*'], 'pandas.core.series.Series.quantile': ['*'], 'pandas.core.series.Series.rename': ['*'], 'pandas.core.series.Series.repeat': ['*'], 'pandas.core.series.Series.replace': ['*'], 'pandas.core.series.Series.reset_index': ['*'], 'pandas.core.series.Series.searchsorted': [ # This doctest seems to be incorrectly parsed. "x = pd.Categorical(['apple', 'bread', 'bread'," ], 'pandas.core.series.Series.set_axis': ['*'], 'pandas.core.series.Series.sort_index': ['*'], 'pandas.core.series.Series.sort_values': ['*'], 'pandas.core.series.Series.to_csv': ['*'], 'pandas.core.series.Series.to_markdown': ['*'], 'pandas.core.series.Series.update': ['*'], 'pandas.core.series.Series.view': [ # Inspection after modification. 's' ], }) self.assertEqual(result.failed, 0)
def test_groupby_tests(self): result = doctests.testmod( pd.core.groupby.groupby, use_beam=False, wont_implement_ok={ 'pandas.core.groupby.groupby.GroupBy.head': ['*'], 'pandas.core.groupby.groupby.GroupBy.tail': ['*'], 'pandas.core.groupby.groupby.GroupBy.nth': ['*'], 'pandas.core.groupby.groupby.GroupBy.cumcount': ['*'], 'pandas.core.groupby.groupby.GroupBy.resample': ['*'], }, not_implemented_ok={ 'pandas.core.groupby.groupby.GroupBy.ngroup': ['*'], 'pandas.core.groupby.groupby.GroupBy.sample': ['*'], 'pandas.core.groupby.groupby.GroupBy.rank': ['*'], 'pandas.core.groupby.groupby.GroupBy.nth': [ "df.groupby('A', as_index=False).nth(1)", ], }, skip={ # Uses iloc to mutate a DataFrame 'pandas.core.groupby.groupby.GroupBy.resample': [ 'df.iloc[2, 0] = 5', 'df', ], # df is reassigned 'pandas.core.groupby.groupby.GroupBy.rank': ['df'], # TODO: Raise wont implement for list passed as a grouping column # Currently raises unhashable type: list 'pandas.core.groupby.groupby.GroupBy.ngroup': [ 'df.groupby(["A", [1,1,2,3,2,1]]).ngroup()' ], }) self.assertEqual(result.failed, 0) result = doctests.testmod( pd.core.groupby.generic, use_beam=False, wont_implement_ok={ # Returns an array by default, not a Series. WontImplement # (non-deferred) 'pandas.core.groupby.generic.SeriesGroupBy.unique': ['*'], # TODO: Is take actually deprecated? 'pandas.core.groupby.generic.DataFrameGroupBy.take': ['*'], 'pandas.core.groupby.generic.SeriesGroupBy.take': ['*'], 'pandas.core.groupby.generic.SeriesGroupBy.nsmallest': [ "s.nsmallest(3, keep='last')", "s.nsmallest(3)", "s.nsmallest()", ], 'pandas.core.groupby.generic.SeriesGroupBy.nlargest': [ "s.nlargest(3, keep='last')", "s.nlargest(3)", "s.nlargest()", ], 'pandas.core.groupby.generic.DataFrameGroupBy.diff': ['*'], 'pandas.core.groupby.generic.SeriesGroupBy.diff': ['*'], 'pandas.core.groupby.generic.DataFrameGroupBy.hist': ['*'], 'pandas.core.groupby.generic.DataFrameGroupBy.fillna': [ 'df.fillna(method=\'ffill\')', 'df.fillna(method="ffill")', 'df.fillna(value=values, limit=1)', ], 'pandas.core.groupby.generic.SeriesGroupBy.fillna': [ 'df.fillna(method=\'ffill\')', 'df.fillna(method="ffill")', 'df.fillna(value=values, limit=1)', ], }, not_implemented_ok={ 'pandas.core.groupby.generic.DataFrameGroupBy.idxmax': ['*'], 'pandas.core.groupby.generic.DataFrameGroupBy.idxmin': ['*'], 'pandas.core.groupby.generic.SeriesGroupBy.transform': ['*'], 'pandas.core.groupby.generic.SeriesGroupBy.idxmax': ['*'], 'pandas.core.groupby.generic.SeriesGroupBy.idxmin': ['*'], 'pandas.core.groupby.generic.SeriesGroupBy.apply': ['*'], }, skip={ 'pandas.core.groupby.generic.SeriesGroupBy.cov': [ # Floating point comparison fails 's1.cov(s2)', ], 'pandas.core.groupby.generic.DataFrameGroupBy.cov': [ # Mutates input DataFrame with loc # TODO: Replicate in frames_test.py "df.loc[df.index[:5], 'a'] = np.nan", "df.loc[df.index[5:10], 'b'] = np.nan", "df.cov(min_periods=12)", ], # These examples rely on grouping by a list 'pandas.core.groupby.generic.SeriesGroupBy.aggregate': ['*'], 'pandas.core.groupby.generic.DataFrameGroupBy.aggregate': ['*'], 'pandas.core.groupby.generic.SeriesGroupBy.transform': [ # Dropping invalid columns during a transform is unsupported. 'grouped.transform(lambda x: (x - x.mean()) / x.std())' ], 'pandas.core.groupby.generic.DataFrameGroupBy.transform': [ # Dropping invalid columns during a transform is unsupported. 'grouped.transform(lambda x: (x - x.mean()) / x.std())' ], # Skipped idxmax/idxmin due an issue with the test framework 'pandas.core.groupby.generic.SeriesGroupBy.idxmin': ['s.idxmin()'], 'pandas.core.groupby.generic.SeriesGroupBy.idxmax': ['s.idxmax()'], # Uses as_index, which is currently not_implemented 'pandas.core.groupby.generic.DataFrameGroupBy.value_counts': [ "df.groupby('gender', as_index=False).value_counts()", # pylint: disable=line-too-long "df.groupby('gender', as_index=False).value_counts(normalize=True)", ], }) self.assertEqual(result.failed, 0)
def test_dataframe_tests(self): result = doctests.testmod( pd.core.frame, use_beam=False, report=True, wont_implement_ok={ 'pandas.core.frame.DataFrame.T': ['*'], 'pandas.core.frame.DataFrame.cummax': ['*'], 'pandas.core.frame.DataFrame.cummin': ['*'], 'pandas.core.frame.DataFrame.cumsum': ['*'], 'pandas.core.frame.DataFrame.cumprod': ['*'], 'pandas.core.frame.DataFrame.diff': ['*'], 'pandas.core.frame.DataFrame.fillna': [ 'df.fillna(method=\'ffill\')', 'df.fillna(method="ffill")', 'df.fillna(value=values, limit=1)', ], 'pandas.core.frame.DataFrame.items': ['*'], 'pandas.core.frame.DataFrame.itertuples': ['*'], 'pandas.core.frame.DataFrame.iterrows': ['*'], 'pandas.core.frame.DataFrame.iteritems': ['*'], # default keep is 'first' 'pandas.core.frame.DataFrame.nlargest': [ "df.nlargest(3, 'population')", "df.nlargest(3, ['population', 'GDP'])", "df.nlargest(3, 'population', keep='last')" ], 'pandas.core.frame.DataFrame.nsmallest': [ "df.nsmallest(3, 'population')", "df.nsmallest(3, ['population', 'GDP'])", "df.nsmallest(3, 'population', keep='last')", ], 'pandas.core.frame.DataFrame.replace': [ "s.replace([1, 2], method='bfill')", # Relies on method='pad' "s.replace('a')", # Relies on method='pad' # value=None is not valid for pandas < 1.4 "s.replace('a', None)", # Implicitly uses method='pad', but output doesn't rely on that # behavior. Verified indepently in # frames_test.py::DeferredFrameTest::test_replace "df.replace(regex={r'^ba.$': 'new', 'foo': 'xyz'})" ], 'pandas.core.frame.DataFrame.to_records': ['*'], 'pandas.core.frame.DataFrame.to_dict': ['*'], 'pandas.core.frame.DataFrame.to_numpy': ['*'], 'pandas.core.frame.DataFrame.to_string': ['*'], 'pandas.core.frame.DataFrame.transpose': ['*'], 'pandas.core.frame.DataFrame.shape': ['*'], 'pandas.core.frame.DataFrame.shift': [ 'df.shift(periods=3)', 'df.shift(periods=3, fill_value=0)', ], 'pandas.core.frame.DataFrame.unstack': ['*'], 'pandas.core.frame.DataFrame.memory_usage': ['*'], 'pandas.core.frame.DataFrame.info': ['*'], # Not equal to df.agg('mode', axis='columns', numeric_only=True) # because there can be multiple columns if a row has more than one # mode 'pandas.core.frame.DataFrame.mode': [ "df.mode(axis='columns', numeric_only=True)" ], 'pandas.core.frame.DataFrame.append': [ 'df.append(df2, ignore_index=True)', "for i in range(5):\n" + " df = df.append({'A': i}, ignore_index=True)", ], 'pandas.core.frame.DataFrame.sort_index': ['*'], 'pandas.core.frame.DataFrame.sort_values': ['*'], 'pandas.core.frame.DataFrame.melt': [ "df.melt(id_vars=['A'], value_vars=['B'])", "df.melt(id_vars=['A'], value_vars=['B', 'C'])", "df.melt(col_level=0, id_vars=['A'], value_vars=['B'])", "df.melt(id_vars=[('A', 'D')], value_vars=[('B', 'E')])", "df.melt(id_vars=['A'], value_vars=['B'],\n" + " var_name='myVarname', value_name='myValname')" ], # Most keep= options are order-sensitive 'pandas.core.frame.DataFrame.drop_duplicates': ['*'], 'pandas.core.frame.DataFrame.duplicated': [ 'df.duplicated()', "df.duplicated(keep='last')", "df.duplicated(subset=['brand'])", ], 'pandas.core.frame.DataFrame.reindex': ['*'], 'pandas.core.frame.DataFrame.dot': [ # reindex not supported 's2 = s.reindex([1, 0, 2, 3])', ], 'pandas.core.frame.DataFrame.resample': ['*'], 'pandas.core.frame.DataFrame.values': ['*'], }, not_implemented_ok={ 'pandas.core.frame.DataFrame.transform': [ # str arg not supported. Tested with np.sum in # frames_test.py::DeferredFrameTest::test_groupby_transform_sum "df.groupby('Date')['Data'].transform('sum')", ], 'pandas.core.frame.DataFrame.melt': ['*'], 'pandas.core.frame.DataFrame.reindex_axis': ['*'], 'pandas.core.frame.DataFrame.round': [ 'df.round(decimals)', ], # We should be able to support pivot and pivot_table for categorical # columns 'pandas.core.frame.DataFrame.pivot': ['*'], # Trivially elementwise for axis=columns. Relies on global indexing # for axis=rows. # Difficult to determine proxy, need to inspect function 'pandas.core.frame.DataFrame.apply': ['*'], # Cross-join not implemented 'pandas.core.frame.DataFrame.merge': [ "df1.merge(df2, how='cross')" ], # TODO(BEAM-11711) 'pandas.core.frame.DataFrame.set_index': [ "df.set_index([s, s**2])", ], 'pandas.core.frame.DataFrame.set_axis': [ "df.set_axis(range(0,2), axis='index')", ], # TODO(BEAM-12495) 'pandas.core.frame.DataFrame.value_counts': [ 'df.value_counts(dropna=False)' ], }, skip={ # DataFrame construction from a dictionary and # Series requires using the len() function, which # is a non-deferred operation that we do not allow 'pandas.core.frame.DataFrame': [ 'pd.DataFrame(data=d, index=[0, 1, 2, 3])', ], # s2 created with reindex 'pandas.core.frame.DataFrame.dot': [ 'df.dot(s2)', ], 'pandas.core.frame.DataFrame.resample': ['df'], 'pandas.core.frame.DataFrame.asfreq': ['*'], # Throws NotImplementedError when modifying df 'pandas.core.frame.DataFrame.axes': [ # Returns deferred index. 'df.axes', ], # Skipped because the relies on loc to set cells in df2 'pandas.core.frame.DataFrame.compare': ['*'], 'pandas.core.frame.DataFrame.cov': [ # Relies on setting entries ahead of time. "df.loc[df.index[:5], 'a'] = np.nan", "df.loc[df.index[5:10], 'b'] = np.nan", 'df.cov(min_periods=12)', ], 'pandas.core.frame.DataFrame.rename': [ # Returns deferred index. 'df.index', 'df.rename(index=str).index', ], 'pandas.core.frame.DataFrame.set_index': [ # TODO(BEAM-11711): This could pass in the index as # a DeferredIndex, and we should fail it as order-sensitive. "df.set_index([pd.Index([1, 2, 3, 4]), 'year'])", ], 'pandas.core.frame.DataFrame.set_axis': [ # This should pass as set_axis(axis='columns') # and fail with set_axis(axis='index') "df.set_axis(['a', 'b', 'c'], axis='index')" ], 'pandas.core.frame.DataFrame.to_markdown': ['*'], 'pandas.core.frame.DataFrame.to_parquet': ['*'], # Raises right exception, but testing framework has matching issues. # Tested in `frames_test.py`. 'pandas.core.frame.DataFrame.insert': [ 'df', 'df.insert(1, "newcol", [99, 99])', 'df.insert(0, "col1", [100, 100], allow_duplicates=True)' ], 'pandas.core.frame.DataFrame.to_records': [ 'df.index = df.index.rename("I")', 'index_dtypes = f"<S{df.index.str.len().max()}"', # 1.x 'index_dtypes = "<S{}".format(df.index.str.len().max())', #0.x 'df.to_records(index_dtypes=index_dtypes)', ], # These tests use the static method pd.pivot_table, which doesn't # actually raise NotImplementedError 'pandas.core.frame.DataFrame.pivot_table': ['*'], # Expected to raise a ValueError, but we raise NotImplementedError 'pandas.core.frame.DataFrame.pivot': [ "df.pivot(index='foo', columns='bar', values='baz')" ], 'pandas.core.frame.DataFrame.append': [ 'df', # pylint: disable=line-too-long "pd.concat([pd.DataFrame([i], columns=['A']) for i in range(5)],\n" " ignore_index=True)" ], 'pandas.core.frame.DataFrame.eval': ['df'], 'pandas.core.frame.DataFrame.melt': [ "df.columns = [list('ABC'), list('DEF')]", "df" ], 'pandas.core.frame.DataFrame.merge': [ # Order-sensitive index, checked in frames_test.py. "df1.merge(df2, left_on='lkey', right_on='rkey')", "df1.merge(df2, left_on='lkey', right_on='rkey',\n" " suffixes=('_left', '_right'))", "df1.merge(df2, how='left', on='a')", ], # Raises right exception, but testing framework has matching issues. 'pandas.core.frame.DataFrame.replace': [ "df.replace({'a string': 'new value', True: False}) # raises" ], 'pandas.core.frame.DataFrame.to_sparse': ['type(df)'], # Skipped because "seen_wont_implement" is reset before getting to # these calls, so the NameError they raise is not ignored. 'pandas.core.frame.DataFrame.T': [ 'df1_transposed.dtypes', 'df2_transposed.dtypes' ], 'pandas.core.frame.DataFrame.transpose': [ 'df1_transposed.dtypes', 'df2_transposed.dtypes' ], # Skipped because the relies on iloc to set a cell to NA. Test is # replicated in frames_test::DeferredFrameTest::test_applymap. 'pandas.core.frame.DataFrame.applymap': [ 'df_copy.iloc[0, 0] = pd.NA', "df_copy.applymap(lambda x: len(str(x)), na_action='ignore')", ], # Skipped so we don't need to install natsort 'pandas.core.frame.DataFrame.sort_values': [ 'from natsort import index_natsorted', 'df.sort_values(\n' ' by="time",\n' ' key=lambda x: np.argsort(index_natsorted(df["time"]))\n' ')' ], # Mode that we don't yet support, documentation added in pandas # 1.2.0 (https://github.com/pandas-dev/pandas/issues/35912) 'pandas.core.frame.DataFrame.aggregate': [ "df.agg(x=('A', max), y=('B', 'min'), z=('C', np.mean))" ], }) self.assertEqual(result.failed, 0)
def test_string_tests(self): # TODO(BEAM-10720) result = doctests.testmod( pd.core.strings, use_beam=False, skip={ 'pandas.core.strings.StringMethods': ['*'], 'pandas.core.strings.StringMethods.capitalize': ['*'], 'pandas.core.strings.StringMethods.casefold': ['*'], 'pandas.core.strings.StringMethods.cat': ['*'], 'pandas.core.strings.StringMethods.contains': ['*'], 'pandas.core.strings.StringMethods.count': ['*'], 'pandas.core.strings.StringMethods.endswith': ['*'], 'pandas.core.strings.StringMethods.extract': ['*'], 'pandas.core.strings.StringMethods.extractall': ['*'], 'pandas.core.strings.StringMethods.findall': ['*'], 'pandas.core.strings.StringMethods.get': ['*'], 'pandas.core.strings.StringMethods.get_dummies': ['*'], 'pandas.core.strings.StringMethods.isalnum': ['*'], 'pandas.core.strings.StringMethods.isalpha': ['*'], 'pandas.core.strings.StringMethods.isdecimal': ['*'], 'pandas.core.strings.StringMethods.isdigit': ['*'], 'pandas.core.strings.StringMethods.islower': ['*'], 'pandas.core.strings.StringMethods.isnumeric': ['*'], 'pandas.core.strings.StringMethods.isspace': ['*'], 'pandas.core.strings.StringMethods.istitle': ['*'], 'pandas.core.strings.StringMethods.isupper': ['*'], 'pandas.core.strings.StringMethods.join': ['*'], 'pandas.core.strings.StringMethods.len': ['*'], 'pandas.core.strings.StringMethods.lower': ['*'], 'pandas.core.strings.StringMethods.lstrip': ['*'], 'pandas.core.strings.StringMethods.pad': ['*'], 'pandas.core.strings.StringMethods.partition': ['*'], 'pandas.core.strings.StringMethods.repeat': ['*'], 'pandas.core.strings.StringMethods.replace': ['*'], 'pandas.core.strings.StringMethods.rpartition': ['*'], 'pandas.core.strings.StringMethods.rsplit': ['*'], 'pandas.core.strings.StringMethods.rstrip': ['*'], 'pandas.core.strings.StringMethods.slice': ['*'], 'pandas.core.strings.StringMethods.slice_replace': ['*'], 'pandas.core.strings.StringMethods.split': ['*'], 'pandas.core.strings.StringMethods.startswith': ['*'], 'pandas.core.strings.StringMethods.strip': ['*'], 'pandas.core.strings.StringMethods.swapcase': ['*'], 'pandas.core.strings.StringMethods.title': ['*'], 'pandas.core.strings.StringMethods.upper': ['*'], 'pandas.core.strings.StringMethods.wrap': ['*'], 'pandas.core.strings.StringMethods.zfill': ['*'], 'pandas.core.strings.str_contains': ['*'], 'pandas.core.strings.str_count': ['*'], 'pandas.core.strings.str_endswith': ['*'], 'pandas.core.strings.str_extract': ['*'], 'pandas.core.strings.str_extractall': ['*'], 'pandas.core.strings.str_findall': ['*'], 'pandas.core.strings.str_get': ['*'], 'pandas.core.strings.str_get_dummies': ['*'], 'pandas.core.strings.str_join': ['*'], 'pandas.core.strings.str_pad': ['*'], 'pandas.core.strings.str_repeat': ['*'], 'pandas.core.strings.str_replace': ['*'], 'pandas.core.strings.str_slice': ['*'], 'pandas.core.strings.str_slice_replace': ['*'], 'pandas.core.strings.str_startswith': ['*'], 'pandas.core.strings.str_wrap': ['*'], }) self.assertEqual(result.failed, 0)
def test_series_tests(self): result = doctests.testmod( pd.core.series, use_beam=False, report=True, wont_implement_ok={ 'pandas.core.series.Series.__array__': ['*'], 'pandas.core.series.Series.array': ['*'], 'pandas.core.series.Series.cummax': ['*'], 'pandas.core.series.Series.cummin': ['*'], 'pandas.core.series.Series.cumsum': ['*'], 'pandas.core.series.Series.cumprod': ['*'], 'pandas.core.series.Series.diff': ['*'], 'pandas.core.series.Series.dot': [ 's.dot(arr)', # non-deferred result ], 'pandas.core.series.Series.fillna': [ 'df.fillna(method=\'ffill\')', 'df.fillna(method="ffill")', 'df.fillna(value=values, limit=1)', ], 'pandas.core.series.Series.info': ['*'], 'pandas.core.series.Series.items': ['*'], 'pandas.core.series.Series.iteritems': ['*'], # default keep is 'first' 'pandas.core.series.Series.nlargest': [ "s.nlargest()", "s.nlargest(3)", "s.nlargest(3, keep='last')", ], 'pandas.core.series.Series.memory_usage': ['*'], 'pandas.core.series.Series.nsmallest': [ "s.nsmallest()", "s.nsmallest(3)", "s.nsmallest(3, keep='last')", ], 'pandas.core.series.Series.pop': ['*'], 'pandas.core.series.Series.searchsorted': ['*'], 'pandas.core.series.Series.shift': [ 'df.shift(periods=3)', 'df.shift(periods=3, fill_value=0)', ], 'pandas.core.series.Series.take': ['*'], 'pandas.core.series.Series.to_dict': ['*'], 'pandas.core.series.Series.unique': ['*'], 'pandas.core.series.Series.unstack': ['*'], 'pandas.core.series.Series.values': ['*'], 'pandas.core.series.Series.view': ['*'], 'pandas.core.series.Series.append': [ 's1.append(s2, ignore_index=True)', ], 'pandas.core.series.Series.replace': [ "s.replace([1, 2], method='bfill')", # Relies on method='pad' "s.replace('a')", # Relies on method='pad' # value=None is not valid for pandas < 1.4 "s.replace('a', None)", # Implicitly uses method='pad', but output doesn't rely on that # behavior. Verified indepently in # frames_test.py::DeferredFrameTest::test_replace "df.replace(regex={r'^ba.$': 'new', 'foo': 'xyz'})" ], 'pandas.core.series.Series.sort_index': ['*'], 'pandas.core.series.Series.sort_values': ['*'], 'pandas.core.series.Series.argmax': ['*'], 'pandas.core.series.Series.argmin': ['*'], 'pandas.core.series.Series.drop_duplicates': [ 's.drop_duplicates()', "s.drop_duplicates(keep='last')", ], 'pandas.core.series.Series.reindex': ['*'], 'pandas.core.series.Series.autocorr': ['*'], 'pandas.core.series.Series.repeat': ['s.repeat([1, 2, 3])'], 'pandas.core.series.Series.resample': ['*'], 'pandas.core.series.Series': ['ser.iloc[0] = 999'], }, not_implemented_ok={ 'pandas.core.series.Series.transform': [ # str arg not supported. Tested with np.sum in # frames_test.py::DeferredFrameTest::test_groupby_transform_sum "df.groupby('Date')['Data'].transform('sum')", ], 'pandas.core.series.Series.groupby': [ 'ser.groupby(["a", "b", "a", "b"]).mean()', 'ser.groupby(["a", "b", "a", np.nan]).mean()', 'ser.groupby(["a", "b", "a", np.nan], dropna=False).mean()', ], }, skip={ # Relies on setting values with iloc 'pandas.core.series.Series': ['ser', 'r'], 'pandas.core.series.Series.groupby': [ # TODO(BEAM-11393): This example requires aligning two series # with non-unique indexes. It only works in pandas because # pandas can recognize the indexes are identical and elide the # alignment. 'ser.groupby(ser > 100).mean()', ], 'pandas.core.series.Series.asfreq': ['*'], # error formatting 'pandas.core.series.Series.append': [ 's1.append(s2, verify_integrity=True)', ], 'pandas.core.series.Series.cov': [ # Differs in LSB on jenkins. "s1.cov(s2)", ], # Skipped idxmax/idxmin due an issue with the test framework 'pandas.core.series.Series.idxmin': ['s.idxmin()'], 'pandas.core.series.Series.idxmax': ['s.idxmax()'], 'pandas.core.series.Series.duplicated': ['*'], 'pandas.core.series.Series.set_axis': ['*'], 'pandas.core.series.Series.nonzero': ['*'], 'pandas.core.series.Series.pop': ['ser'], # testing side effect # Raises right exception, but testing framework has matching issues. 'pandas.core.series.Series.replace': [ "df.replace({'a string': 'new value', True: False}) # raises" ], 'pandas.core.series.Series.searchsorted': [ # This doctest seems to be incorrectly parsed. "x = pd.Categorical(['apple', 'bread', 'bread'," ], 'pandas.core.series.Series.to_csv': ['*'], 'pandas.core.series.Series.to_markdown': ['*'], 'pandas.core.series.Series.update': ['*'], 'pandas.core.series.Series.view': [ # Inspection after modification. 's' ], 'pandas.core.series.Series.resample': ['df'], }) self.assertEqual(result.failed, 0)