Esempio n. 1
0
  def test_datetime_tests(self):
    # TODO(BEAM-10721)
    datetimelike_result = doctests.testmod(
        pd.core.arrays.datetimelike,
        use_beam=False,
        skip={
            'pandas.core.arrays.datetimelike.AttributesMixin._unbox_scalar': [
                '*'
            ],
            'pandas.core.arrays.datetimelike.TimelikeOps.ceil': ['*'],
            'pandas.core.arrays.datetimelike.TimelikeOps.floor': ['*'],
            'pandas.core.arrays.datetimelike.TimelikeOps.round': ['*'],
        })

    datetime_result = doctests.testmod(
        pd.core.arrays.datetimes,
        use_beam=False,
        skip={
            'pandas.core.arrays.datetimes.DatetimeArray.is_leap_year': ['*'],
            'pandas.core.arrays.datetimes.DatetimeArray.is_month_end': ['*'],
            'pandas.core.arrays.datetimes.DatetimeArray.is_month_start': ['*'],
            'pandas.core.arrays.datetimes.DatetimeArray.is_quarter_end': ['*'],
            'pandas.core.arrays.datetimes.DatetimeArray.is_quarter_start': [
                '*'
            ],
            'pandas.core.arrays.datetimes.DatetimeArray.is_year_end': ['*'],
            'pandas.core.arrays.datetimes.DatetimeArray.is_year_start': ['*'],
            'pandas.core.arrays.datetimes.DatetimeArray.to_period': ['*'],
            'pandas.core.arrays.datetimes.DatetimeArray.tz_localize': ['*'],
        })

    self.assertEqual(datetimelike_result.failed, 0)
    self.assertEqual(datetime_result.failed, 0)
Esempio n. 2
0
  def test_datetime_tests(self):
    # TODO(BEAM-10721)
    indexes_accessors_result = doctests.testmod(
        pd.core.indexes.accessors,
        use_beam=False,
        skip={
            'pandas.core.indexes.accessors.TimedeltaProperties': [
                # Seems like an upstream bug. The property is 'second'
                'seconds_series.dt.seconds'
            ],

            # TODO(BEAM-12530): Test data creation fails for these
            #   s = pd.Series(pd.to_timedelta(np.arange(5), unit="d"))
            # pylint: disable=line-too-long
            'pandas.core.indexes.accessors.DatetimeProperties.to_pydatetime': [
                '*'
            ],
            'pandas.core.indexes.accessors.TimedeltaProperties.components': [
                '*'
            ],
            'pandas.core.indexes.accessors.TimedeltaProperties.to_pytimedelta': [
                '*'
            ],
            # pylint: enable=line-too-long
        })
    datetimelike_result = doctests.testmod(
        pd.core.arrays.datetimelike, use_beam=False)

    datetime_result = doctests.testmod(
        pd.core.arrays.datetimes,
        use_beam=False,
        wont_implement_ok={
            'pandas.core.arrays.datetimes.DatetimeArray.to_period': ['*'],
            # All tz_localize tests use unsupported values for ambiguous=
            # Verified seperately in
            # frames_test.py::DeferredFrameTest::test_dt_tz_localize_*
            'pandas.core.arrays.datetimes.DatetimeArray.tz_localize': ['*'],
        },
        not_implemented_ok={
            # Verifies index version of this method
            'pandas.core.arrays.datetimes.DatetimeArray.to_period': [
                'df.index.to_period("M")'
            ],
        })

    self.assertEqual(indexes_accessors_result.failed, 0)
    self.assertEqual(datetimelike_result.failed, 0)
    self.assertEqual(datetime_result.failed, 0)
Esempio n. 3
0
 def test_string_tests(self):
   result = doctests.testmod(
       pd.core.strings,
       use_beam=False,
       wont_implement_ok={
           # These methods can accept deferred series objects, but not lists
           'pandas.core.strings.StringMethods.cat': [
               "s.str.cat(['A', 'B', 'C', 'D'], sep=',')",
               "s.str.cat(['A', 'B', 'C', 'D'], sep=',', na_rep='-')",
               "s.str.cat(['A', 'B', 'C', 'D'], na_rep='-')"
           ],
           'pandas.core.strings.StringMethods.repeat': [
               's.str.repeat(repeats=[1, 2, 3])'
           ],
           'pandas.core.strings.str_repeat': [
               's.str.repeat(repeats=[1, 2, 3])'
           ],
       },
       skip={
           # Bad test strings
           'pandas.core.strings.str_replace': [
               "pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr)"
           ],
           'pandas.core.strings.StringMethods.replace': [
               "pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr)"
           ],
       })
   self.assertEqual(result.failed, 0)
Esempio n. 4
0
    def test_string_tests(self):
        result = doctests.testmod(
            pd.core.strings,
            use_beam=False,
            skip={
                'pandas.core.strings.StringMethods.cat': ['*'],
                'pandas.core.strings.StringMethods.repeat': ['*'],
                'pandas.core.strings.str_repeat': ['*'],

                # The rest of the skipped tests represent bad test strings,
                # fixed upstream in
                # https://github.com/pandas-dev/pandas/commit/d095ac899da953d759992824592a72a1e6ff5e09
                'pandas.core.strings.StringMethods':
                ["s.str.split('_')", "s.str.replace('_', '')"],
                'pandas.core.strings.str_split': ["s.str.split(expand=True)"],
                'pandas.core.strings.str_replace':
                ["pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr)"],
                'pandas.core.strings.StringMethods.replace':
                ["pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr)"],
                'pandas.core.strings.StringMethods.partition':
                ['idx.str.partition()'],
                'pandas.core.strings.StringMethods.rpartition':
                ['idx.str.partition()'],
                # rsplit/split are particularly troublesome because the first test,
                # defining a test series, is bad and must be skipped. But skipping
                # it breaks every other test. To run the rest we would need to
                # execute the first test but ignore the output.
                'pandas.core.strings.StringMethods.rsplit': ["*"],
                'pandas.core.strings.StringMethods.split': ["*"],
            })
        self.assertEqual(result.failed, 0)
Esempio n. 5
0
 def test_dataframe_tests(self):
     result = doctests.testmod(
         pd.core.frame,
         use_beam=False,
         skip={
             'pandas.core.frame.DataFrame.T': ['*'],
             'pandas.core.frame.DataFrame.agg': ['*'],
             'pandas.core.frame.DataFrame.aggregate': ['*'],
             'pandas.core.frame.DataFrame.append': ['*'],
             'pandas.core.frame.DataFrame.apply': ['*'],
             'pandas.core.frame.DataFrame.applymap': ['df ** 2'],
             'pandas.core.frame.DataFrame.assign': ['*'],
             'pandas.core.frame.DataFrame.axes': ['*'],
             'pandas.core.frame.DataFrame.combine': ['*'],
             'pandas.core.frame.DataFrame.combine_first': ['*'],
             'pandas.core.frame.DataFrame.corr': ['*'],
             'pandas.core.frame.DataFrame.count': ['*'],
             'pandas.core.frame.DataFrame.cov': ['*'],
             'pandas.core.frame.DataFrame.dot': ['*'],
             'pandas.core.frame.DataFrame.drop': ['*'],
             'pandas.core.frame.DataFrame.eval': ['*'],
             'pandas.core.frame.DataFrame.explode': ['*'],
             'pandas.core.frame.DataFrame.fillna': ['*'],
             'pandas.core.frame.DataFrame.info': ['*'],
             'pandas.core.frame.DataFrame.isin': ['*'],
             'pandas.core.frame.DataFrame.iterrows':
             ["print(df['int'].dtype)"],
             'pandas.core.frame.DataFrame.join': ['*'],
             'pandas.core.frame.DataFrame.melt': ['*'],
             'pandas.core.frame.DataFrame.memory_usage': ['*'],
             'pandas.core.frame.DataFrame.merge': ['*'],
             # Not equal to df.agg('mode', axis='columns', numeric_only=True)
             'pandas.core.frame.DataFrame.mode':
             ["df.mode(axis='columns', numeric_only=True)"],
             'pandas.core.frame.DataFrame.nlargest': ['*'],
             'pandas.core.frame.DataFrame.nsmallest': ['*'],
             'pandas.core.frame.DataFrame.nunique': ['*'],
             'pandas.core.frame.DataFrame.pivot': ['*'],
             'pandas.core.frame.DataFrame.pivot_table': ['*'],
             'pandas.core.frame.DataFrame.query': ['*'],
             'pandas.core.frame.DataFrame.reindex': ['*'],
             'pandas.core.frame.DataFrame.reindex_axis': ['*'],
             'pandas.core.frame.DataFrame.rename': ['*'],
             # Raises right exception, but testing framework has matching issues.
             'pandas.core.frame.DataFrame.replace': [
                 "df.replace({'a string': 'new value', True: False})  # raises"
             ],
             # Uses unseeded np.random.
             'pandas.core.frame.DataFrame.round': ['*'],
             'pandas.core.frame.DataFrame.set_index': ['*'],
             'pandas.core.frame.DataFrame.transpose':
             ['df1_transposed.dtypes', 'df2_transposed.dtypes'],
             'pandas.core.frame.DataFrame.to_sparse': ['type(df)'],
             # Uses df.index
             'pandas.core.frame.DataFrame.to_records': ['*'],
         })
     self.assertEqual(result.failed, 0)
Esempio n. 6
0
  def test_string_tests(self):
    PD_VERSION = tuple(int(v) for v in pd.__version__.split('.'))
    if PD_VERSION < (1, 2, 0):
      module = pd.core.strings
    else:
      # Definitions were moved to accessor in pandas 1.2.0
      module = pd.core.strings.accessor

    module_name = module.__name__

    result = doctests.testmod(
        module,
        use_beam=False,
        wont_implement_ok={
            # These methods can accept deferred series objects, but not lists
            f'{module_name}.StringMethods.cat': [
                "s.str.cat(['A', 'B', 'C', 'D'], sep=',')",
                "s.str.cat(['A', 'B', 'C', 'D'], sep=',', na_rep='-')",
                "s.str.cat(['A', 'B', 'C', 'D'], na_rep='-')"
            ],
            f'{module_name}.StringMethods.repeat': [
                's.str.repeat(repeats=[1, 2, 3])'
            ],
            f'{module_name}.str_repeat': ['s.str.repeat(repeats=[1, 2, 3])'],
            f'{module_name}.StringMethods.get_dummies': ['*'],
            f'{module_name}.str_get_dummies': ['*'],
        },
        skip={
            # count() on Series with a NaN produces mismatched type if we
            # have a NaN-only partition.
            f'{module_name}.StringMethods.count': ["s.str.count('a')"],
            f'{module_name}.str_count': ["s.str.count('a')"],

            # Produce None instead of NaN, see
            # frames_test.py::DeferredFrameTest::test_str_split
            f'{module_name}.StringMethods.rsplit': [
                's.str.split(expand=True)',
                's.str.rsplit("/", n=1, expand=True)',
            ],
            f'{module_name}.StringMethods.split': [
                's.str.split(expand=True)',
                's.str.rsplit("/", n=1, expand=True)',
            ],

            # Bad test strings in pandas 1.1.x
            f'{module_name}.str_replace': [
                "pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr)"
            ],
            f'{module_name}.StringMethods.replace': [
                "pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr)"
            ],

            # output has incorrect formatting in 1.2.x
            f'{module_name}.StringMethods.extractall': ['*']
        })
    self.assertEqual(result.failed, 0)
Esempio n. 7
0
 def test_series_tests(self):
     result = doctests.testmod(
         pd.core.series,
         use_beam=False,
         skip={
             'pandas.core.series.Series.append': ['*'],
             'pandas.core.series.Series.argmax': ['*'],
             'pandas.core.series.Series.argmin': ['*'],
             'pandas.core.series.Series.autocorr': ['*'],
             'pandas.core.series.Series.between': ['*'],
             'pandas.core.series.Series.combine': ['*'],
             'pandas.core.series.Series.combine_first': ['*'],
             'pandas.core.series.Series.corr': ['*'],
             'pandas.core.series.Series.count': ['*'],
             'pandas.core.series.Series.cov': ['*'],
             'pandas.core.series.Series.dot': ['*'],
             'pandas.core.series.Series.drop': ['*'],
             'pandas.core.series.Series.drop_duplicates': ['*'],
             'pandas.core.series.Series.dropna': ['*'],
             'pandas.core.series.Series.duplicated': ['*'],
             'pandas.core.series.Series.explode': ['*'],
             'pandas.core.series.Series.fillna': ['*'],
             'pandas.core.series.Series.idxmax': ['*'],
             'pandas.core.series.Series.idxmin': ['*'],
             'pandas.core.series.Series.isin': ['*'],
             'pandas.core.series.Series.items': ['*'],
             'pandas.core.series.Series.iteritems': ['*'],
             'pandas.core.series.Series.memory_usage': ['*'],
             'pandas.core.series.Series.nlargest': ['*'],
             'pandas.core.series.Series.nonzero': ['*'],
             'pandas.core.series.Series.nsmallest': ['*'],
             'pandas.core.series.Series.quantile': ['*'],
             'pandas.core.series.Series.reindex': ['*'],
             'pandas.core.series.Series.rename': ['*'],
             'pandas.core.series.Series.repeat': ['*'],
             'pandas.core.series.Series.replace': ['*'],
             'pandas.core.series.Series.reset_index': ['*'],
             'pandas.core.series.Series.round': ['*'],
             'pandas.core.series.Series.searchsorted': ['*'],
             'pandas.core.series.Series.shift': ['*'],
             'pandas.core.series.Series.sort_index': ['*'],
             'pandas.core.series.Series.sort_values': ['*'],
             'pandas.core.series.Series.take': ['*'],
             'pandas.core.series.Series.to_csv': ['*'],
             'pandas.core.series.Series.to_dict': ['*'],
             'pandas.core.series.Series.to_frame': ['*'],
             'pandas.core.series.Series.unique': ['*'],
             'pandas.core.series.Series.update': ['*'],
             'pandas.core.series.Series.values': ['*'],
             'pandas.core.series.Series.view': ['*'],
         })
     self.assertEqual(result.failed, 0)
Esempio n. 8
0
  def test_string_tests(self):
    if PD_VERSION < (1, 2):
      module = pd.core.strings
    else:
      # Definitions were moved to accessor in pandas 1.2.0
      module = pd.core.strings.accessor

    module_name = module.__name__

    result = doctests.testmod(
        module,
        use_beam=False,
        wont_implement_ok={
            # These methods can accept deferred series objects, but not lists
            f'{module_name}.StringMethods.cat': [
                "s.str.cat(['A', 'B', 'C', 'D'], sep=',')",
                "s.str.cat(['A', 'B', 'C', 'D'], sep=',', na_rep='-')",
                "s.str.cat(['A', 'B', 'C', 'D'], na_rep='-')"
            ],
            f'{module_name}.StringMethods.repeat': [
                's.str.repeat(repeats=[1, 2, 3])'
            ],
            f'{module_name}.str_repeat': ['s.str.repeat(repeats=[1, 2, 3])'],
            # get_dummies pandas examples are not casted to CategoricalDtype
            # Must be CategoricalDtype to work in Beam
            f'{module_name}.StringMethods.get_dummies': ['*'],
            f'{module_name}.str_get_dummies': ['*'],
            f'{module_name}.StringMethods': ['s.str.split("_")'],
            f'{module_name}.StringMethods.rsplit': ['*'],
            f'{module_name}.StringMethods.split': ['*'],
        },
        skip={
            # count() on Series with a NaN produces mismatched type if we
            # have a NaN-only partition.
            f'{module_name}.StringMethods.count': ["s.str.count('a')"],
            f'{module_name}.str_count': ["s.str.count('a')"],

            # Bad test strings in pandas 1.1.x
            f'{module_name}.str_replace': [
                "pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr)"
            ],
            f'{module_name}.StringMethods.replace': [
                "pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr)"
            ],

            # output has incorrect formatting in 1.2.x
            f'{module_name}.StringMethods.extractall': ['*']
        })
    self.assertEqual(result.failed, 0)
Esempio n. 9
0
 def test_indexing_tests(self):
     result = doctests.testmod(pd.core.indexing,
                               use_beam=False,
                               skip={
                                   'pandas.core.indexing._AtIndexer': ['*'],
                                   'pandas.core.indexing._IndexSlice':
                                   ['*'],
                                   'pandas.core.indexing._LocIndexer':
                                   ['*'],
                                   'pandas.core.indexing._iAtIndexer':
                                   ['*'],
                                   'pandas.core.indexing._iLocIndexer':
                                   ['*'],
                               })
     self.assertEqual(result.failed, 0)
Esempio n. 10
0
 def test_series_tests(self):
     result = doctests.testmod(
         pd.core.series,
         use_beam=False,
         report=True,
         wont_implement_ok={
             'pandas.core.series.Series.__array__': ['*'],
             'pandas.core.series.Series.array': ['*'],
             'pandas.core.series.Series.cummax': ['*'],
             'pandas.core.series.Series.cummin': ['*'],
             'pandas.core.series.Series.cumsum': ['*'],
             'pandas.core.series.Series.cumprod': ['*'],
             'pandas.core.series.Series.diff': ['*'],
             'pandas.core.series.Series.dot': [
                 's.dot(arr)',  # non-deferred result
             ],
             'pandas.core.series.Series.fillna': [
                 "df.fillna(method='ffill')",
                 'df.fillna(value=values, limit=1)',
             ],
             'pandas.core.series.Series.items': ['*'],
             'pandas.core.series.Series.iteritems': ['*'],
             # default keep is 'first'
             'pandas.core.series.Series.nlargest': [
                 "s.nlargest()",
                 "s.nlargest(3)",
                 "s.nlargest(3, keep='last')",
             ],
             'pandas.core.series.Series.memory_usage': ['*'],
             'pandas.core.series.Series.nsmallest': [
                 "s.nsmallest()",
                 "s.nsmallest(3)",
                 "s.nsmallest(3, keep='last')",
             ],
             'pandas.core.series.Series.pop': ['*'],
             'pandas.core.series.Series.searchsorted': ['*'],
             'pandas.core.series.Series.shift': ['*'],
             'pandas.core.series.Series.take': ['*'],
             'pandas.core.series.Series.to_dict': ['*'],
             'pandas.core.series.Series.unique': ['*'],
             'pandas.core.series.Series.unstack': ['*'],
             'pandas.core.series.Series.values': ['*'],
             'pandas.core.series.Series.view': ['*'],
             'pandas.core.series.Series.append': [
                 's1.append(s2, ignore_index=True)',
             ],
         },
         not_implemented_ok={
             'pandas.core.series.Series.transform': ['*'],
             'pandas.core.series.Series.groupby': [
                 'ser.groupby(["a", "b", "a", "b"]).mean()',
                 'ser.groupby(["a", "b", "a", np.nan]).mean()',
                 'ser.groupby(["a", "b", "a", np.nan], dropna=False).mean()',
                 # Grouping by a series is not supported
                 'ser.groupby(ser > 100).mean()',
             ],
             'pandas.core.series.Series.reindex': ['*'],
         },
         skip={
             # error formatting
             'pandas.core.series.Series.append': [
                 's1.append(s2, verify_integrity=True)',
             ],
             # Throws NotImplementedError when modifying df
             'pandas.core.series.Series.transform': ['df'],
             'pandas.core.series.Series.argmax': ['*'],
             'pandas.core.series.Series.argmin': ['*'],
             'pandas.core.series.Series.autocorr': ['*'],
             'pandas.core.series.Series.combine': ['*'],
             'pandas.core.series.Series.combine_first': ['*'],
             'pandas.core.series.Series.compare': ['*'],
             'pandas.core.series.Series.cov': [
                 # Differs in LSB on jenkins.
                 "s1.cov(s2)",
             ],
             'pandas.core.series.Series.drop_duplicates': ['*'],
             'pandas.core.series.Series.duplicated': ['*'],
             'pandas.core.series.Series.explode': ['*'],
             'pandas.core.series.Series.idxmax': ['*'],
             'pandas.core.series.Series.idxmin': ['*'],
             'pandas.core.series.Series.name': ['*'],
             'pandas.core.series.Series.nonzero': ['*'],
             'pandas.core.series.Series.quantile': ['*'],
             'pandas.core.series.Series.pop':
             ['ser'],  # testing side effect
             'pandas.core.series.Series.repeat': ['*'],
             'pandas.core.series.Series.replace': ['*'],
             'pandas.core.series.Series.reset_index': ['*'],
             'pandas.core.series.Series.searchsorted': [
                 # This doctest seems to be incorrectly parsed.
                 "x = pd.Categorical(['apple', 'bread', 'bread',"
             ],
             'pandas.core.series.Series.set_axis': ['*'],
             'pandas.core.series.Series.sort_index': ['*'],
             'pandas.core.series.Series.sort_values': ['*'],
             'pandas.core.series.Series.to_csv': ['*'],
             'pandas.core.series.Series.to_markdown': ['*'],
             'pandas.core.series.Series.update': ['*'],
             'pandas.core.series.Series.view': [
                 # Inspection after modification.
                 's'
             ],
         })
     self.assertEqual(result.failed, 0)
Esempio n. 11
0
    def test_groupby_tests(self):
        result = doctests.testmod(
            pd.core.groupby.groupby,
            use_beam=False,
            wont_implement_ok={
                'pandas.core.groupby.groupby.GroupBy.head': ['*'],
                'pandas.core.groupby.groupby.GroupBy.tail': ['*'],
                'pandas.core.groupby.groupby.GroupBy.nth': ['*'],
                'pandas.core.groupby.groupby.GroupBy.cumcount': ['*'],
            },
            not_implemented_ok={
                'pandas.core.groupby.groupby.GroupBy.describe': ['*'],
                'pandas.core.groupby.groupby.GroupBy.ngroup': ['*'],
                'pandas.core.groupby.groupby.GroupBy.resample': ['*'],
                'pandas.core.groupby.groupby.GroupBy.sample': ['*'],
                'pandas.core.groupby.groupby.GroupBy.quantile': ['*'],
                'pandas.core.groupby.groupby.BaseGroupBy.pipe': ['*'],
                # pipe tests are in a different location in pandas 1.1.x
                'pandas.core.groupby.groupby._GroupBy.pipe': ['*'],
                'pandas.core.groupby.groupby.GroupBy.nth': [
                    "df.groupby('A', as_index=False).nth(1)",
                ],
            },
            skip={
                # Uses iloc to mutate a DataFrame
                'pandas.core.groupby.groupby.GroupBy.resample': [
                    'df.iloc[2, 0] = 5',
                    'df',
                ],
                # TODO: Raise wont implement for list passed as a grouping column
                # Currently raises unhashable type: list
                'pandas.core.groupby.groupby.GroupBy.ngroup':
                ['df.groupby(["A", [1,1,2,3,2,1]]).ngroup()'],
            })
        self.assertEqual(result.failed, 0)

        result = doctests.testmod(
            pd.core.groupby.generic,
            use_beam=False,
            wont_implement_ok={
                # Returns an array by default, not a Series. WontImplement
                # (non-deferred)
                'pandas.core.groupby.generic.SeriesGroupBy.unique': ['*'],
                # TODO: Is take actually deprecated?
                'pandas.core.groupby.generic.DataFrameGroupBy.take': ['*'],
                'pandas.core.groupby.generic.SeriesGroupBy.take': ['*'],
                'pandas.core.groupby.generic.SeriesGroupBy.nsmallest': [
                    "s.nsmallest(3, keep='last')",
                    "s.nsmallest(3)",
                    "s.nsmallest()",
                ],
                'pandas.core.groupby.generic.SeriesGroupBy.nlargest': [
                    "s.nlargest(3, keep='last')",
                    "s.nlargest(3)",
                    "s.nlargest()",
                ],
                'pandas.core.groupby.generic.DataFrameGroupBy.diff': ['*'],
                'pandas.core.groupby.generic.SeriesGroupBy.diff': ['*'],
                'pandas.core.groupby.generic.DataFrameGroupBy.hist': ['*'],
                'pandas.core.groupby.generic.DataFrameGroupBy.fillna': [
                    "df.fillna(method='ffill')",
                    'df.fillna(value=values, limit=1)',
                ],
                'pandas.core.groupby.generic.SeriesGroupBy.fillna': [
                    "df.fillna(method='ffill')",
                    'df.fillna(value=values, limit=1)',
                ],
            },
            not_implemented_ok={
                'pandas.core.groupby.generic.DataFrameGroupBy.transform':
                ['*'],
                'pandas.core.groupby.generic.DataFrameGroupBy.idxmax': ['*'],
                'pandas.core.groupby.generic.DataFrameGroupBy.idxmin': ['*'],
                'pandas.core.groupby.generic.DataFrameGroupBy.filter': ['*'],
                'pandas.core.groupby.generic.DataFrameGroupBy.nunique': ['*'],
                'pandas.core.groupby.generic.SeriesGroupBy.transform': ['*'],
                'pandas.core.groupby.generic.SeriesGroupBy.idxmax': ['*'],
                'pandas.core.groupby.generic.SeriesGroupBy.idxmin': ['*'],
                'pandas.core.groupby.generic.SeriesGroupBy.filter': ['*'],
                'pandas.core.groupby.generic.SeriesGroupBy.describe': ['*'],
            },
            skip={
                'pandas.core.groupby.generic.SeriesGroupBy.cov': [
                    # Floating point comparison fails
                    's1.cov(s2)',
                ],
                'pandas.core.groupby.generic.DataFrameGroupBy.cov': [
                    # Mutates input DataFrame with loc
                    # TODO: Replicate in frames_test.py
                    "df.loc[df.index[:5], 'a'] = np.nan",
                    "df.loc[df.index[5:10], 'b'] = np.nan",
                    "df.cov(min_periods=12)",
                ],
                # These examples rely on grouping by a list
                'pandas.core.groupby.generic.SeriesGroupBy.aggregate': ['*'],
                'pandas.core.groupby.generic.DataFrameGroupBy.aggregate':
                ['*'],
            })
        self.assertEqual(result.failed, 0)
Esempio n. 12
0
    def test_dataframe_tests(self):
        result = doctests.testmod(
            pd.core.frame,
            use_beam=False,
            report=True,
            wont_implement_ok={
                'pandas.core.frame.DataFrame.T': ['*'],
                'pandas.core.frame.DataFrame.cummax': ['*'],
                'pandas.core.frame.DataFrame.cummin': ['*'],
                'pandas.core.frame.DataFrame.cumsum': ['*'],
                'pandas.core.frame.DataFrame.cumprod': ['*'],
                'pandas.core.frame.DataFrame.diff': ['*'],
                'pandas.core.frame.DataFrame.fillna': [
                    "df.fillna(method='ffill')",
                    'df.fillna(value=values, limit=1)',
                ],
                'pandas.core.frame.DataFrame.items': ['*'],
                'pandas.core.frame.DataFrame.itertuples': ['*'],
                'pandas.core.frame.DataFrame.iterrows': ['*'],
                'pandas.core.frame.DataFrame.iteritems': ['*'],
                # default keep is 'first'
                'pandas.core.frame.DataFrame.nlargest': [
                    "df.nlargest(3, 'population')",
                    "df.nlargest(3, ['population', 'GDP'])",
                    "df.nlargest(3, 'population', keep='last')"
                ],
                'pandas.core.frame.DataFrame.nsmallest': [
                    "df.nsmallest(3, 'population')",
                    "df.nsmallest(3, ['population', 'GDP'])",
                    "df.nsmallest(3, 'population', keep='last')",
                ],
                'pandas.core.frame.DataFrame.replace': [
                    "s.replace([1, 2], method='bfill')",
                    # Relies on method='pad'
                    "s.replace('a', None)",
                ],
                'pandas.core.frame.DataFrame.to_records': ['*'],
                'pandas.core.frame.DataFrame.to_dict': ['*'],
                'pandas.core.frame.DataFrame.to_numpy': ['*'],
                'pandas.core.frame.DataFrame.to_string': ['*'],
                'pandas.core.frame.DataFrame.transpose': ['*'],
                'pandas.core.frame.DataFrame.shape': ['*'],
                'pandas.core.frame.DataFrame.shift': [
                    'df.shift(periods=3, freq="D")',
                    'df.shift(periods=3, freq="infer")'
                ],
                'pandas.core.frame.DataFrame.unstack': ['*'],
                'pandas.core.frame.DataFrame.memory_usage': ['*'],
                'pandas.core.frame.DataFrame.info': ['*'],
                # Not equal to df.agg('mode', axis='columns', numeric_only=True)
                # because there can be multiple columns if a row has more than one
                # mode
                'pandas.core.frame.DataFrame.mode':
                ["df.mode(axis='columns', numeric_only=True)"],
                'pandas.core.frame.DataFrame.append': [
                    'df.append(df2, ignore_index=True)',
                    "for i in range(5):\n" +
                    "    df = df.append({'A': i}, ignore_index=True)",
                ],
            },
            not_implemented_ok={
                'pandas.core.frame.DataFrame.transform': ['*'],
                'pandas.core.frame.DataFrame.isin': ['*'],
                'pandas.core.frame.DataFrame.melt': ['*'],
                'pandas.core.frame.DataFrame.reindex': ['*'],
                'pandas.core.frame.DataFrame.reindex_axis': ['*'],
                'pandas.core.frame.DataFrame.round': [
                    'df.round(decimals)',
                ],

                # We should be able to support pivot and pivot_table for categorical
                # columns
                'pandas.core.frame.DataFrame.pivot': ['*'],

                # We can implement this as a zipping operator, but it won't have the
                # same capability. The doctest includes an example that branches on
                # a deferred result.
                'pandas.core.frame.DataFrame.combine': ['*'],

                # Can be implemented as a zipping operator
                'pandas.core.frame.DataFrame.combine_first': ['*'],

                # Difficult to parallelize but should be possible?
                'pandas.core.frame.DataFrame.dot': [
                    # reindex not supported
                    's2 = s.reindex([1, 0, 2, 3])',
                    'df.dot(s2)',
                ],

                # Trivially elementwise for axis=columns. Relies on global indexing
                # for axis=rows.
                # Difficult to determine proxy, need to inspect function
                'pandas.core.frame.DataFrame.apply': ['*'],

                # Cross-join not implemented
                'pandas.core.frame.DataFrame.merge':
                ["df1.merge(df2, how='cross')"],

                # TODO(BEAM-11711)
                'pandas.core.frame.DataFrame.set_index': [
                    "df.set_index([s, s**2])",
                ],
            },
            skip={
                # Throws NotImplementedError when modifying df
                'pandas.core.frame.DataFrame.transform': ['df'],
                'pandas.core.frame.DataFrame.axes': [
                    # Returns deferred index.
                    'df.axes',
                ],
                'pandas.core.frame.DataFrame.compare': ['*'],
                'pandas.core.frame.DataFrame.cov': [
                    # Relies on setting entries ahead of time.
                    "df.loc[df.index[:5], 'a'] = np.nan",
                    "df.loc[df.index[5:10], 'b'] = np.nan",
                    'df.cov(min_periods=12)',
                ],
                'pandas.core.frame.DataFrame.drop_duplicates': ['*'],
                'pandas.core.frame.DataFrame.duplicated': ['*'],
                'pandas.core.frame.DataFrame.idxmax': ['*'],
                'pandas.core.frame.DataFrame.idxmin': ['*'],
                'pandas.core.frame.DataFrame.rename': [
                    # Returns deferred index.
                    'df.index',
                    'df.rename(index=str).index',
                ],
                'pandas.core.frame.DataFrame.set_index': [
                    # TODO(BEAM-11711): This could pass in the index as
                    # a DeferredIndex, and we should fail it as order-sensitive.
                    "df.set_index([pd.Index([1, 2, 3, 4]), 'year'])",
                ],
                'pandas.core.frame.DataFrame.set_axis': ['*'],
                'pandas.core.frame.DataFrame.sort_index': ['*'],
                'pandas.core.frame.DataFrame.to_markdown': ['*'],
                'pandas.core.frame.DataFrame.to_parquet': ['*'],
                'pandas.core.frame.DataFrame.value_counts': ['*'],
                'pandas.core.frame.DataFrame.to_records': [
                    'df.index = df.index.rename("I")',
                    'index_dtypes = f"<S{df.index.str.len().max()}"',  # 1.x
                    'index_dtypes = "<S{}".format(df.index.str.len().max())',  #0.x
                    'df.to_records(index_dtypes=index_dtypes)',
                ],
                # These tests use the static method pd.pivot_table, which doesn't
                # actually raise NotImplementedError
                'pandas.core.frame.DataFrame.pivot_table': ['*'],
                # Expected to raise a ValueError, but we raise NotImplementedError
                'pandas.core.frame.DataFrame.pivot':
                ["df.pivot(index='foo', columns='bar', values='baz')"],
                'pandas.core.frame.DataFrame.append': [
                    'df',
                    # pylint: disable=line-too-long
                    "pd.concat([pd.DataFrame([i], columns=['A']) for i in range(5)],\n"
                    "          ignore_index=True)"
                ],
                'pandas.core.frame.DataFrame.eval': ['df'],
                'pandas.core.frame.DataFrame.melt':
                ["df.columns = [list('ABC'), list('DEF')]", "df"],
                'pandas.core.frame.DataFrame.merge': [
                    # Order-sensitive index, checked in frames_test.py.
                    "df1.merge(df2, left_on='lkey', right_on='rkey')",
                    "df1.merge(df2, left_on='lkey', right_on='rkey',\n"
                    "          suffixes=('_left', '_right'))",
                    "df1.merge(df2, how='left', on='a')",
                ],
                # Raises right exception, but testing framework has matching issues.
                'pandas.core.frame.DataFrame.replace': [
                    "df.replace({'a string': 'new value', True: False})  # raises"
                ],
                'pandas.core.frame.DataFrame.to_sparse': ['type(df)'],

                # Skipped because "seen_wont_implement" is reset before getting to
                # these calls, so the NameError they raise is not ignored.
                'pandas.core.frame.DataFrame.T':
                ['df1_transposed.dtypes', 'df2_transposed.dtypes'],
                'pandas.core.frame.DataFrame.transpose':
                ['df1_transposed.dtypes', 'df2_transposed.dtypes'],
                # Skipped because the relies on iloc to set a cell to NA. Test is
                # replicated in frames_test::DeferredFrameTest::test_applymap.
                'pandas.core.frame.DataFrame.applymap': [
                    'df_copy.iloc[0, 0] = pd.NA',
                    "df_copy.applymap(lambda x: len(str(x)), na_action='ignore')",
                ],
                # Skipped so we don't need to install natsort
                'pandas.core.frame.DataFrame.sort_values': [
                    'from natsort import index_natsorted', 'df.sort_values(\n'
                    '   by="time",\n'
                    '   key=lambda x: np.argsort(index_natsorted(df["time"]))\n'
                    ')'
                ],
                # Mode that we don't yet support, documentation added in pandas
                # 1.2.0 (https://github.com/pandas-dev/pandas/issues/35912)
                'pandas.core.frame.DataFrame.aggregate':
                ["df.agg(x=('A', max), y=('B', 'min'), z=('C', np.mean))"],
            })
        self.assertEqual(result.failed, 0)
Esempio n. 13
0
    def test_ndframe_tests(self):
        # IO methods are tested in io_test.py
        skip_writes = {
            f'pandas.core.generic.NDFrame.{name}': ['*']
            for name in dir(pd.core.generic.NDFrame) if name.startswith('to_')
        }

        result = doctests.testmod(
            pd.core.generic,
            use_beam=False,
            report=True,
            wont_implement_ok={
                'pandas.core.generic.NDFrame.first': ['*'],
                'pandas.core.generic.NDFrame.head': ['*'],
                'pandas.core.generic.NDFrame.last': ['*'],
                'pandas.core.generic.NDFrame.shift': ['*'],
                'pandas.core.generic.NDFrame.tail': ['*'],
                'pandas.core.generic.NDFrame.take': ['*'],
                'pandas.core.generic.NDFrame.values': ['*'],
                'pandas.core.generic.NDFrame.tz_localize': [
                    "s.tz_localize('CET', ambiguous='infer')",
                    # np.array is not a deferred object. This use-case is possible
                    # with a deferred Series though, which is tested in
                    # frames_test.py
                    "s.tz_localize('CET', ambiguous=np.array([True, True, False]))",
                ],
                'pandas.core.generic.NDFrame.truncate': [
                    # These inputs rely on tail (wont implement, order
                    # sensitive) for verification
                    "df.tail()",
                    "df.loc['2016-01-05':'2016-01-10', :].tail()",
                ],
                'pandas.core.generic.NDFrame.replace': [
                    "s.replace([1, 2], method='bfill')",
                    # Relies on method='pad'
                    "s.replace('a', None)",
                ],
                'pandas.core.generic.NDFrame.fillna': [
                    "df.fillna(method='ffill')",
                    'df.fillna(value=values, limit=1)',
                ],
            },
            not_implemented_ok={
                'pandas.core.generic.NDFrame.add_prefix': ['*'],
                'pandas.core.generic.NDFrame.add_suffix': ['*'],
                'pandas.core.generic.NDFrame.asof': ['*'],
                'pandas.core.generic.NDFrame.at_time': ['*'],
                'pandas.core.generic.NDFrame.between_time': ['*'],
                'pandas.core.generic.NDFrame.describe': ['*'],
                'pandas.core.generic.NDFrame.ewm': ['*'],
                'pandas.core.generic.NDFrame.expanding': ['*'],
                'pandas.core.generic.NDFrame.flags': ['*'],
                'pandas.core.generic.NDFrame.interpolate': ['*'],
                'pandas.core.generic.NDFrame.mask': ['*'],
                'pandas.core.generic.NDFrame.pct_change': ['*'],
                'pandas.core.generic.NDFrame.rank': ['*'],
                'pandas.core.generic.NDFrame.reindex': ['*'],
                'pandas.core.generic.NDFrame.reindex_like': ['*'],
                'pandas.core.generic.NDFrame.replace': ['*'],
                'pandas.core.generic.NDFrame.resample': ['*'],
                'pandas.core.generic.NDFrame.rolling': ['*'],
                'pandas.core.generic.NDFrame.sample': ['*'],
                'pandas.core.generic.NDFrame.set_flags': ['*'],
                'pandas.core.generic.NDFrame.squeeze': ['*'],
                'pandas.core.generic.NDFrame.transform': ['*'],
                'pandas.core.generic.NDFrame.truncate': ['*'],
                'pandas.core.generic.NDFrame.where': ['*'],
                'pandas.core.generic.NDFrame.xs': ['*'],
                # argsort unimplemented
                'pandas.core.generic.NDFrame.abs': [
                    'df.loc[(df.c - 43).abs().argsort()]',
                ],
            },
            skip={
                # Internal test
                'pandas.core.generic.NDFrame._set_axis_name': ['*'],
                # Fails to construct test series. asfreq is not implemented anyway.
                'pandas.core.generic.NDFrame.asfreq': ['*'],
                'pandas.core.generic.NDFrame.astype': ['*'],
                'pandas.core.generic.NDFrame.convert_dtypes': ['*'],
                'pandas.core.generic.NDFrame.copy': ['*'],
                'pandas.core.generic.NDFrame.droplevel': ['*'],
                'pandas.core.generic.NDFrame.infer_objects': ['*'],
                'pandas.core.generic.NDFrame.rank': [
                    # Modified dataframe
                    'df'
                ],
                'pandas.core.generic.NDFrame.rename': [
                    # Seems to be an upstream bug. The actual error has a different
                    # message:
                    #   TypeError: Index(...) must be called with a collection of
                    #   some kind, 2 was passed
                    # pandas doctests only verify the type of exception
                    'df.rename(2)'
                ],
                # Tests rely on setting index
                'pandas.core.generic.NDFrame.rename_axis': ['*'],
                # Raises right exception, but testing framework has matching issues.
                'pandas.core.generic.NDFrame.replace': [
                    "df.replace({'a string': 'new value', True: False})  # raises"
                ],
                'pandas.core.generic.NDFrame.squeeze': ['*'],

                # NameError
                'pandas.core.generic.NDFrame.resample': ['df'],

                # Skipped so we don't need to install natsort
                'pandas.core.generic.NDFrame.sort_values': [
                    'from natsort import index_natsorted', 'df.sort_values(\n'
                    '   by="time",\n'
                    '   key=lambda x: np.argsort(index_natsorted(df["time"]))\n'
                    ')'
                ],
                **skip_writes
            })
        self.assertEqual(result.failed, 0)
  def test_dataframe_tests(self):
    result = doctests.testmod(
        pd.core.frame,
        use_beam=False,
        report=True,
        wont_implement_ok={
            'pandas.core.frame.DataFrame.T': ['*'],
            'pandas.core.frame.DataFrame.cummax': ['*'],
            'pandas.core.frame.DataFrame.cummin': ['*'],
            'pandas.core.frame.DataFrame.cumsum': ['*'],
            'pandas.core.frame.DataFrame.cumprod': ['*'],
            'pandas.core.frame.DataFrame.diff': ['*'],
            'pandas.core.frame.DataFrame.items': ['*'],
            'pandas.core.frame.DataFrame.itertuples': ['*'],
            'pandas.core.frame.DataFrame.iterrows': ['*'],
            'pandas.core.frame.DataFrame.iteritems': ['*'],
            # default keep is 'first'
            'pandas.core.frame.DataFrame.nlargest': [
                "df.nlargest(3, 'population')",
                "df.nlargest(3, ['population', 'GDP'])",
                "df.nlargest(3, 'population', keep='last')"
            ],
            'pandas.core.frame.DataFrame.nsmallest': [
                "df.nsmallest(3, 'population')",
                "df.nsmallest(3, ['population', 'GDP'])",
                "df.nsmallest(3, 'population', keep='last')",
            ],
            'pandas.core.frame.DataFrame.nunique': ['*'],
            'pandas.core.frame.DataFrame.to_records': ['*'],
            'pandas.core.frame.DataFrame.to_dict': ['*'],
            'pandas.core.frame.DataFrame.to_numpy': ['*'],
            'pandas.core.frame.DataFrame.to_string': ['*'],
            'pandas.core.frame.DataFrame.transpose': ['*'],
            'pandas.core.frame.DataFrame.shape': ['*'],
            'pandas.core.frame.DataFrame.shift': [
                'df.shift(periods=3, freq="D")',
                'df.shift(periods=3, freq="infer")'
            ],
            'pandas.core.frame.DataFrame.unstack': ['*'],
            'pandas.core.frame.DataFrame.memory_usage': ['*'],
            'pandas.core.frame.DataFrame.info': ['*'],
            # Not equal to df.agg('mode', axis='columns', numeric_only=True)
            # because there can be multiple columns if a row has more than one
            # mode
            'pandas.core.frame.DataFrame.mode': [
                "df.mode(axis='columns', numeric_only=True)"
            ],
        },
        not_implemented_ok={
            'pandas.core.frame.DataFrame.isin': ['*'],
            'pandas.core.frame.DataFrame.melt': ['*'],
            'pandas.core.frame.DataFrame.axes': ['*'],
            'pandas.core.frame.DataFrame.count': ['*'],
            'pandas.core.frame.DataFrame.reindex': ['*'],
            'pandas.core.frame.DataFrame.reindex_axis': ['*'],

            # We should be able to support pivot and pivot_table for categorical
            # columns
            'pandas.core.frame.DataFrame.pivot': ['*'],

            # DataFrame.__getitem__ cannot be used as loc
            'pandas.core.frame.DataFrame.query': [
                'df[df.A > df.B]', "df[df.B == df['C C']]"
            ],

            # We can implement this as a zipping operator, but it won't have the
            # same capability. The doctest includes an example that branches on
            # a deferred result.
            'pandas.core.frame.DataFrame.combine': ['*'],

            # Can be implemented as a zipping operator
            'pandas.core.frame.DataFrame.combine_first': ['*'],

            # Difficult to parallelize but should be possible?
            'pandas.core.frame.DataFrame.corr': ['*'],
            'pandas.core.frame.DataFrame.cov': ['*'],
            'pandas.core.frame.DataFrame.dot': ['*'],

            # element-wise
            'pandas.core.frame.DataFrame.eval': ['*'],
            'pandas.core.frame.DataFrame.explode': ['*'],

            # Trivially elementwise for axis=columns. Relies on global indexing
            # for axis=rows.
            'pandas.core.frame.DataFrame.drop': ['*'],
            'pandas.core.frame.DataFrame.rename': ['*'],
            'pandas.core.frame.DataFrame.apply': ['*'],

            # Zipping operation if input is a DeferredSeries
            'pandas.core.frame.DataFrame.assign': ['*'],

            # In theory this is possible for bounded inputs?
            'pandas.core.frame.DataFrame.append': ['*'],
        },
        skip={
            'pandas.core.frame.DataFrame.compare': ['*'],
            'pandas.core.frame.DataFrame.drop_duplicates': ['*'],
            'pandas.core.frame.DataFrame.duplicated': ['*'],
            'pandas.core.frame.DataFrame.groupby': [
                'df.groupby(level=0).mean()',
                'df.groupby(level="Type").mean()',
                'df.groupby(by=["b"], dropna=False).sum()',
                'df.groupby(by="a", dropna=False).sum()'
            ],
            'pandas.core.frame.DataFrame.idxmax': ['*'],
            'pandas.core.frame.DataFrame.idxmin': ['*'],
            'pandas.core.frame.DataFrame.pop': ['*'],
            'pandas.core.frame.DataFrame.set_axis': ['*'],
            'pandas.core.frame.DataFrame.sort_index': ['*'],
            'pandas.core.frame.DataFrame.to_markdown': ['*'],
            'pandas.core.frame.DataFrame.to_parquet': ['*'],
            'pandas.core.frame.DataFrame.value_counts': ['*'],

            'pandas.core.frame.DataFrame.to_records': [
                'df.index = df.index.rename("I")',
                'index_dtypes = f"<S{df.index.str.len().max()}"', # 1.x
                'index_dtypes = "<S{}".format(df.index.str.len().max())', #0.x
                'df.to_records(index_dtypes=index_dtypes)',
            ],
            # These tests use the static method pd.pivot_table, which doesn't
            # actually raise NotImplementedError
            'pandas.core.frame.DataFrame.pivot_table': ['*'],
            # Expected to raise a ValueError, but we raise NotImplementedError
            'pandas.core.frame.DataFrame.pivot': [
                "df.pivot(index='foo', columns='bar', values='baz')"
            ],
            'pandas.core.frame.DataFrame.append': [
                'df',
                # pylint: disable=line-too-long
                "pd.concat([pd.DataFrame([i], columns=['A']) for i in range(5)],\n"
                "          ignore_index=True)"
            ],
            'pandas.core.frame.DataFrame.eval': ['df'],
            # No override for __matmul__ and friends
            'pandas.core.frame.DataFrame.dot': ['df @ other'],
            'pandas.core.frame.DataFrame.melt': [
                "df.columns = [list('ABC'), list('DEF')]", "df"
            ],
            'pandas.core.frame.DataFrame.merge': [
                # Order-sensitive index, checked in frames_test.py.
                "df1.merge(df2, left_on='lkey', right_on='rkey')",
                "df1.merge(df2, left_on='lkey', right_on='rkey',\n"
                "          suffixes=('_left', '_right'))",
            ],
            # Raises right exception, but testing framework has matching issues.
            'pandas.core.frame.DataFrame.replace': [
                "df.replace({'a string': 'new value', True: False})  # raises"
            ],
            # Should raise WontImplement order-sensitive
            'pandas.core.frame.DataFrame.set_index': [
                "df.set_index([pd.Index([1, 2, 3, 4]), 'year'])",
                "df.set_index([s, s**2])",
            ],
            'pandas.core.frame.DataFrame.to_sparse': ['type(df)'],

            # DeferredSeries has no attribute dtype. Should we allow this and
            # defer to proxy?
            'pandas.core.frame.DataFrame.iterrows': ["print(df['int'].dtype)"],

            # Skipped because "seen_wont_implement" is reset before getting to
            # these calls, so the NameError they raise is not ignored.
            'pandas.core.frame.DataFrame.T': [
                'df1_transposed.dtypes', 'df2_transposed.dtypes'
            ],
            'pandas.core.frame.DataFrame.transpose': [
                'df1_transposed.dtypes', 'df2_transposed.dtypes'
            ],
        })
    self.assertEqual(result.failed, 0)
Esempio n. 15
0
 def test_dataframe_tests(self):
     result = doctests.testmod(
         pd.core.frame,
         use_beam=False,
         report=True,
         wont_implement_ok={
             'pandas.core.frame.DataFrame.T': ['*'],
             'pandas.core.frame.DataFrame.cummax': ['*'],
             'pandas.core.frame.DataFrame.cummin': ['*'],
             'pandas.core.frame.DataFrame.cumsum': ['*'],
             'pandas.core.frame.DataFrame.cumprod': ['*'],
             'pandas.core.frame.DataFrame.diff': ['*'],
             'pandas.core.frame.DataFrame.items': ['*'],
             'pandas.core.frame.DataFrame.itertuples': ['*'],
             'pandas.core.frame.DataFrame.iterrows': ['*'],
             'pandas.core.frame.DataFrame.iteritems': ['*'],
             # default keep is 'first'
             'pandas.core.frame.DataFrame.nlargest': [
                 "df.nlargest(3, 'population')",
                 "df.nlargest(3, ['population', 'GDP'])",
                 "df.nlargest(3, 'population', keep='last')"
             ],
             'pandas.core.frame.DataFrame.nsmallest': [
                 "df.nsmallest(3, 'population')",
                 "df.nsmallest(3, ['population', 'GDP'])",
                 "df.nsmallest(3, 'population', keep='last')",
             ],
             'pandas.core.frame.DataFrame.nunique': ['*'],
             'pandas.core.frame.DataFrame.to_records': ['*'],
             'pandas.core.frame.DataFrame.to_dict': ['*'],
             'pandas.core.frame.DataFrame.to_numpy': ['*'],
             'pandas.core.frame.DataFrame.to_string': ['*'],
             'pandas.core.frame.DataFrame.transpose': ['*'],
             'pandas.core.frame.DataFrame.shape': ['*'],
             'pandas.core.frame.DataFrame.shift': [
                 'df.shift(periods=3, freq="D")',
                 'df.shift(periods=3, freq="infer")'
             ],
             'pandas.core.frame.DataFrame.unstack': ['*'],
             'pandas.core.frame.DataFrame.memory_usage': ['*'],
         },
         skip={
             'pandas.core.frame.DataFrame.T':
             ['df1_transposed.dtypes', 'df2_transposed.dtypes'],
             'pandas.core.frame.DataFrame.agg': ['*'],
             'pandas.core.frame.DataFrame.aggregate': ['*'],
             'pandas.core.frame.DataFrame.append': ['*'],
             'pandas.core.frame.DataFrame.apply': ['*'],
             'pandas.core.frame.DataFrame.applymap': ['df ** 2'],
             'pandas.core.frame.DataFrame.assign': ['*'],
             'pandas.core.frame.DataFrame.axes': ['*'],
             'pandas.core.frame.DataFrame.combine': ['*'],
             'pandas.core.frame.DataFrame.combine_first': ['*'],
             'pandas.core.frame.DataFrame.compare': ['*'],
             'pandas.core.frame.DataFrame.corr': ['*'],
             'pandas.core.frame.DataFrame.count': ['*'],
             'pandas.core.frame.DataFrame.cov': ['*'],
             'pandas.core.frame.DataFrame.dot': ['*'],
             'pandas.core.frame.DataFrame.drop': ['*'],
             'pandas.core.frame.DataFrame.drop_duplicates': ['*'],
             'pandas.core.frame.DataFrame.duplicated': ['*'],
             'pandas.core.frame.DataFrame.eval': ['*'],
             'pandas.core.frame.DataFrame.explode': ['*'],
             'pandas.core.frame.DataFrame.groupby': [
                 # More keyword arguments.
                 'df.groupby(level=0).mean()',
                 'df.groupby(level="Type").mean()',
                 'df.groupby(by=["b"], dropna=False).sum()',
                 'df.groupby(by="a", dropna=False).sum()'
             ],
             'pandas.core.frame.DataFrame.idxmax': ['*'],
             'pandas.core.frame.DataFrame.idxmin': ['*'],
             'pandas.core.frame.DataFrame.info': ['*'],
             'pandas.core.frame.DataFrame.isin': ['*'],
             'pandas.core.frame.DataFrame.iterrows':
             ["print(df['int'].dtype)"],
             'pandas.core.frame.DataFrame.melt': ['*'],
             'pandas.core.frame.DataFrame.memory_usage': ['*'],
             'pandas.core.frame.DataFrame.merge': [
                 # Order-sensitive index, checked in frames_test.py.
                 "df1.merge(df2, left_on='lkey', right_on='rkey')",
                 "df1.merge(df2, left_on='lkey', right_on='rkey',\n"
                 "          suffixes=('_left', '_right'))",
             ],
             # Not equal to df.agg('mode', axis='columns', numeric_only=True)
             'pandas.core.frame.DataFrame.mode':
             ["df.mode(axis='columns', numeric_only=True)"],
             'pandas.core.frame.DataFrame.pivot': ['*'],
             'pandas.core.frame.DataFrame.pivot_table': ['*'],
             'pandas.core.frame.DataFrame.pop': ['*'],
             'pandas.core.frame.DataFrame.query': ['*'],
             'pandas.core.frame.DataFrame.reindex': ['*'],
             # Sets df.index
             'pandas.core.frame.DataFrame.reindex_axis': ['*'],
             'pandas.core.frame.DataFrame.rename': ['*'],
             # Raises right exception, but testing framework has matching issues.
             'pandas.core.frame.DataFrame.replace': [
                 "df.replace({'a string': 'new value', True: False})  # raises"
             ],
             # Uses unseeded np.random.
             'pandas.core.frame.DataFrame.round': ['*'],
             'pandas.core.frame.DataFrame.set_axis': ['*'],
             'pandas.core.frame.DataFrame.set_index': ['*'],
             'pandas.core.frame.DataFrame.sort_index': ['*'],
             'pandas.core.frame.DataFrame.transpose':
             ['df1_transposed.dtypes', 'df2_transposed.dtypes'],
             'pandas.core.frame.DataFrame.to_markdown': ['*'],
             'pandas.core.frame.DataFrame.to_parquet': ['*'],
             # Uses df.index
             'pandas.core.frame.DataFrame.to_records': ['*'],
             'pandas.core.frame.DataFrame.to_sparse': ['type(df)'],
             'pandas.core.frame.DataFrame.value_counts': ['*'],
         })
     self.assertEqual(result.failed, 0)
 def test_series_tests(self):
   result = doctests.testmod(
       pd.core.series,
       use_beam=False,
       report=True,
       wont_implement_ok={
           'pandas.core.series.Series.__array__': ['*'],
           'pandas.core.series.Series.cummax': ['*'],
           'pandas.core.series.Series.cummin': ['*'],
           'pandas.core.series.Series.cumsum': ['*'],
           'pandas.core.series.Series.cumprod': ['*'],
           'pandas.core.series.Series.diff': ['*'],
           'pandas.core.series.Series.items': ['*'],
           'pandas.core.series.Series.iteritems': ['*'],
           # default keep is 'first'
           'pandas.core.series.Series.nlargest': [
               "s.nlargest()",
               "s.nlargest(3)",
               "s.nlargest(3, keep='last')",
           ],
           'pandas.core.series.Series.memory_usage': ['*'],
           'pandas.core.series.Series.nsmallest': [
               "s.nsmallest()",
               "s.nsmallest(3)",
               "s.nsmallest(3, keep='last')",
           ],
           'pandas.core.series.Series.searchsorted': ['*'],
           'pandas.core.series.Series.shift': ['*'],
           'pandas.core.series.Series.take': ['*'],
           'pandas.core.series.Series.to_dict': ['*'],
           'pandas.core.series.Series.unique': ['*'],
           'pandas.core.series.Series.unstack': ['*'],
           'pandas.core.series.Series.values': ['*'],
           'pandas.core.series.Series.view': ['*'],
       },
       not_implemented_ok={
           'pandas.core.series.Series.reindex': ['*'],
       },
       skip={
           'pandas.core.series.Series.array': ['*'],
           'pandas.core.series.Series.append': ['*'],
           'pandas.core.series.Series.argmax': ['*'],
           'pandas.core.series.Series.argmin': ['*'],
           'pandas.core.series.Series.autocorr': ['*'],
           'pandas.core.series.Series.combine': ['*'],
           'pandas.core.series.Series.combine_first': ['*'],
           'pandas.core.series.Series.compare': ['*'],
           'pandas.core.series.Series.corr': ['*'],
           'pandas.core.series.Series.count': ['*'],
           'pandas.core.series.Series.cov': ['*'],
           'pandas.core.series.Series.dot': ['*'],
           'pandas.core.series.Series.drop': ['*'],
           'pandas.core.series.Series.drop_duplicates': ['*'],
           'pandas.core.series.Series.duplicated': ['*'],
           'pandas.core.series.Series.explode': ['*'],
           'pandas.core.series.Series.groupby': ['*'],
           'pandas.core.series.Series.idxmax': ['*'],
           'pandas.core.series.Series.idxmin': ['*'],
           'pandas.core.series.Series.name': ['*'],
           'pandas.core.series.Series.nonzero': ['*'],
           'pandas.core.series.Series.pop': ['*'],
           'pandas.core.series.Series.quantile': ['*'],
           'pandas.core.series.Series.rename': ['*'],
           'pandas.core.series.Series.repeat': ['*'],
           'pandas.core.series.Series.replace': ['*'],
           'pandas.core.series.Series.reset_index': ['*'],
           'pandas.core.series.Series.searchsorted': [
               # This doctest seems to be incorrectly parsed.
               "x = pd.Categorical(['apple', 'bread', 'bread',"
           ],
           'pandas.core.series.Series.set_axis': ['*'],
           'pandas.core.series.Series.sort_index': ['*'],
           'pandas.core.series.Series.sort_values': ['*'],
           'pandas.core.series.Series.to_csv': ['*'],
           'pandas.core.series.Series.to_markdown': ['*'],
           'pandas.core.series.Series.update': ['*'],
           'pandas.core.series.Series.view': [
               # Inspection after modification.
               's'
           ],
       })
   self.assertEqual(result.failed, 0)
Esempio n. 17
0
  def test_groupby_tests(self):
    result = doctests.testmod(
        pd.core.groupby.groupby,
        use_beam=False,
        wont_implement_ok={
            'pandas.core.groupby.groupby.GroupBy.head': ['*'],
            'pandas.core.groupby.groupby.GroupBy.tail': ['*'],
            'pandas.core.groupby.groupby.GroupBy.nth': ['*'],
            'pandas.core.groupby.groupby.GroupBy.cumcount': ['*'],
            'pandas.core.groupby.groupby.GroupBy.resample': ['*'],
        },
        not_implemented_ok={
            'pandas.core.groupby.groupby.GroupBy.ngroup': ['*'],
            'pandas.core.groupby.groupby.GroupBy.sample': ['*'],
            'pandas.core.groupby.groupby.GroupBy.rank': ['*'],
            'pandas.core.groupby.groupby.GroupBy.nth': [
                "df.groupby('A', as_index=False).nth(1)",
            ],
        },
        skip={
            # Uses iloc to mutate a DataFrame
            'pandas.core.groupby.groupby.GroupBy.resample': [
                'df.iloc[2, 0] = 5',
                'df',
            ],
            # df is reassigned
            'pandas.core.groupby.groupby.GroupBy.rank': ['df'],
            # TODO: Raise wont implement for list passed as a grouping column
            # Currently raises unhashable type: list
            'pandas.core.groupby.groupby.GroupBy.ngroup': [
                'df.groupby(["A", [1,1,2,3,2,1]]).ngroup()'
            ],
        })
    self.assertEqual(result.failed, 0)

    result = doctests.testmod(
        pd.core.groupby.generic,
        use_beam=False,
        wont_implement_ok={
            # Returns an array by default, not a Series. WontImplement
            # (non-deferred)
            'pandas.core.groupby.generic.SeriesGroupBy.unique': ['*'],
            # TODO: Is take actually deprecated?
            'pandas.core.groupby.generic.DataFrameGroupBy.take': ['*'],
            'pandas.core.groupby.generic.SeriesGroupBy.take': ['*'],
            'pandas.core.groupby.generic.SeriesGroupBy.nsmallest': [
                "s.nsmallest(3, keep='last')",
                "s.nsmallest(3)",
                "s.nsmallest()",
            ],
            'pandas.core.groupby.generic.SeriesGroupBy.nlargest': [
                "s.nlargest(3, keep='last')",
                "s.nlargest(3)",
                "s.nlargest()",
            ],
            'pandas.core.groupby.generic.DataFrameGroupBy.diff': ['*'],
            'pandas.core.groupby.generic.SeriesGroupBy.diff': ['*'],
            'pandas.core.groupby.generic.DataFrameGroupBy.hist': ['*'],
            'pandas.core.groupby.generic.DataFrameGroupBy.fillna': [
                'df.fillna(method=\'ffill\')',
                'df.fillna(method="ffill")',
                'df.fillna(value=values, limit=1)',
            ],
            'pandas.core.groupby.generic.SeriesGroupBy.fillna': [
                'df.fillna(method=\'ffill\')',
                'df.fillna(method="ffill")',
                'df.fillna(value=values, limit=1)',
            ],
        },
        not_implemented_ok={
            'pandas.core.groupby.generic.DataFrameGroupBy.idxmax': ['*'],
            'pandas.core.groupby.generic.DataFrameGroupBy.idxmin': ['*'],
            'pandas.core.groupby.generic.SeriesGroupBy.transform': ['*'],
            'pandas.core.groupby.generic.SeriesGroupBy.idxmax': ['*'],
            'pandas.core.groupby.generic.SeriesGroupBy.idxmin': ['*'],
            'pandas.core.groupby.generic.SeriesGroupBy.apply': ['*'],
        },
        skip={
            'pandas.core.groupby.generic.SeriesGroupBy.cov': [
                # Floating point comparison fails
                's1.cov(s2)',
            ],
            'pandas.core.groupby.generic.DataFrameGroupBy.cov': [
                # Mutates input DataFrame with loc
                # TODO: Replicate in frames_test.py
                "df.loc[df.index[:5], 'a'] = np.nan",
                "df.loc[df.index[5:10], 'b'] = np.nan",
                "df.cov(min_periods=12)",
            ],
            # These examples rely on grouping by a list
            'pandas.core.groupby.generic.SeriesGroupBy.aggregate': ['*'],
            'pandas.core.groupby.generic.DataFrameGroupBy.aggregate': ['*'],
            'pandas.core.groupby.generic.SeriesGroupBy.transform': [
                # Dropping invalid columns during a transform is unsupported.
                'grouped.transform(lambda x: (x - x.mean()) / x.std())'
            ],
            'pandas.core.groupby.generic.DataFrameGroupBy.transform': [
                # Dropping invalid columns during a transform is unsupported.
                'grouped.transform(lambda x: (x - x.mean()) / x.std())'
            ],
            # Skipped idxmax/idxmin due an issue with the test framework
            'pandas.core.groupby.generic.SeriesGroupBy.idxmin': ['s.idxmin()'],
            'pandas.core.groupby.generic.SeriesGroupBy.idxmax': ['s.idxmax()'],
            # Uses as_index, which is currently not_implemented
            'pandas.core.groupby.generic.DataFrameGroupBy.value_counts': [
                "df.groupby('gender', as_index=False).value_counts()",
                # pylint: disable=line-too-long
                "df.groupby('gender', as_index=False).value_counts(normalize=True)",
            ],
        })
    self.assertEqual(result.failed, 0)
Esempio n. 18
0
  def test_dataframe_tests(self):
    result = doctests.testmod(
        pd.core.frame,
        use_beam=False,
        report=True,
        wont_implement_ok={
            'pandas.core.frame.DataFrame.T': ['*'],
            'pandas.core.frame.DataFrame.cummax': ['*'],
            'pandas.core.frame.DataFrame.cummin': ['*'],
            'pandas.core.frame.DataFrame.cumsum': ['*'],
            'pandas.core.frame.DataFrame.cumprod': ['*'],
            'pandas.core.frame.DataFrame.diff': ['*'],
            'pandas.core.frame.DataFrame.fillna': [
                'df.fillna(method=\'ffill\')',
                'df.fillna(method="ffill")',
                'df.fillna(value=values, limit=1)',
            ],
            'pandas.core.frame.DataFrame.items': ['*'],
            'pandas.core.frame.DataFrame.itertuples': ['*'],
            'pandas.core.frame.DataFrame.iterrows': ['*'],
            'pandas.core.frame.DataFrame.iteritems': ['*'],
            # default keep is 'first'
            'pandas.core.frame.DataFrame.nlargest': [
                "df.nlargest(3, 'population')",
                "df.nlargest(3, ['population', 'GDP'])",
                "df.nlargest(3, 'population', keep='last')"
            ],
            'pandas.core.frame.DataFrame.nsmallest': [
                "df.nsmallest(3, 'population')",
                "df.nsmallest(3, ['population', 'GDP'])",
                "df.nsmallest(3, 'population', keep='last')",
            ],
            'pandas.core.frame.DataFrame.replace': [
                "s.replace([1, 2], method='bfill')",
                # Relies on method='pad'
                "s.replace('a')",
                # Relies on method='pad'
                # value=None is not valid for pandas < 1.4
                "s.replace('a', None)",
                # Implicitly uses method='pad', but output doesn't rely on that
                # behavior. Verified indepently in
                # frames_test.py::DeferredFrameTest::test_replace
                "df.replace(regex={r'^ba.$': 'new', 'foo': 'xyz'})"
            ],
            'pandas.core.frame.DataFrame.to_records': ['*'],
            'pandas.core.frame.DataFrame.to_dict': ['*'],
            'pandas.core.frame.DataFrame.to_numpy': ['*'],
            'pandas.core.frame.DataFrame.to_string': ['*'],
            'pandas.core.frame.DataFrame.transpose': ['*'],
            'pandas.core.frame.DataFrame.shape': ['*'],
            'pandas.core.frame.DataFrame.shift': [
                'df.shift(periods=3)',
                'df.shift(periods=3, fill_value=0)',
            ],
            'pandas.core.frame.DataFrame.unstack': ['*'],
            'pandas.core.frame.DataFrame.memory_usage': ['*'],
            'pandas.core.frame.DataFrame.info': ['*'],
            # Not equal to df.agg('mode', axis='columns', numeric_only=True)
            # because there can be multiple columns if a row has more than one
            # mode
            'pandas.core.frame.DataFrame.mode': [
                "df.mode(axis='columns', numeric_only=True)"
            ],
            'pandas.core.frame.DataFrame.append': [
                'df.append(df2, ignore_index=True)',
                "for i in range(5):\n" +
                "    df = df.append({'A': i}, ignore_index=True)",
            ],
            'pandas.core.frame.DataFrame.sort_index': ['*'],
            'pandas.core.frame.DataFrame.sort_values': ['*'],
            'pandas.core.frame.DataFrame.melt': [
                "df.melt(id_vars=['A'], value_vars=['B'])",
                "df.melt(id_vars=['A'], value_vars=['B', 'C'])",
                "df.melt(col_level=0, id_vars=['A'], value_vars=['B'])",
                "df.melt(id_vars=[('A', 'D')], value_vars=[('B', 'E')])",
                "df.melt(id_vars=['A'], value_vars=['B'],\n" +
                "        var_name='myVarname', value_name='myValname')"
            ],
            # Most keep= options are order-sensitive
            'pandas.core.frame.DataFrame.drop_duplicates': ['*'],
            'pandas.core.frame.DataFrame.duplicated': [
                'df.duplicated()',
                "df.duplicated(keep='last')",
                "df.duplicated(subset=['brand'])",
            ],
            'pandas.core.frame.DataFrame.reindex': ['*'],
            'pandas.core.frame.DataFrame.dot': [
                # reindex not supported
                's2 = s.reindex([1, 0, 2, 3])',
            ],
            'pandas.core.frame.DataFrame.resample': ['*'],
            'pandas.core.frame.DataFrame.values': ['*'],
        },
        not_implemented_ok={
            'pandas.core.frame.DataFrame.transform': [
                # str arg not supported. Tested with np.sum in
                # frames_test.py::DeferredFrameTest::test_groupby_transform_sum
                "df.groupby('Date')['Data'].transform('sum')",
            ],
            'pandas.core.frame.DataFrame.melt': ['*'],
            'pandas.core.frame.DataFrame.reindex_axis': ['*'],
            'pandas.core.frame.DataFrame.round': [
                'df.round(decimals)',
            ],

            # We should be able to support pivot and pivot_table for categorical
            # columns
            'pandas.core.frame.DataFrame.pivot': ['*'],

            # Trivially elementwise for axis=columns. Relies on global indexing
            # for axis=rows.
            # Difficult to determine proxy, need to inspect function
            'pandas.core.frame.DataFrame.apply': ['*'],

            # Cross-join not implemented
            'pandas.core.frame.DataFrame.merge': [
                "df1.merge(df2, how='cross')"
            ],

            # TODO(BEAM-11711)
            'pandas.core.frame.DataFrame.set_index': [
                "df.set_index([s, s**2])",
            ],

            'pandas.core.frame.DataFrame.set_axis': [
                "df.set_axis(range(0,2), axis='index')",
            ],

            # TODO(BEAM-12495)
            'pandas.core.frame.DataFrame.value_counts': [
              'df.value_counts(dropna=False)'
            ],
        },
        skip={
            # DataFrame construction from a dictionary and
            # Series requires using the len() function, which
            # is a non-deferred operation that we do not allow
            'pandas.core.frame.DataFrame': [
                'pd.DataFrame(data=d, index=[0, 1, 2, 3])',
            ],
            # s2 created with reindex
            'pandas.core.frame.DataFrame.dot': [
                'df.dot(s2)',
            ],

            'pandas.core.frame.DataFrame.resample': ['df'],
            'pandas.core.frame.DataFrame.asfreq': ['*'],
            # Throws NotImplementedError when modifying df
            'pandas.core.frame.DataFrame.axes': [
                # Returns deferred index.
                'df.axes',
            ],
            # Skipped because the relies on loc to set cells in df2
            'pandas.core.frame.DataFrame.compare': ['*'],
            'pandas.core.frame.DataFrame.cov': [
                # Relies on setting entries ahead of time.
                "df.loc[df.index[:5], 'a'] = np.nan",
                "df.loc[df.index[5:10], 'b'] = np.nan",
                'df.cov(min_periods=12)',
            ],
            'pandas.core.frame.DataFrame.rename': [
                # Returns deferred index.
                'df.index',
                'df.rename(index=str).index',
            ],
            'pandas.core.frame.DataFrame.set_index': [
                # TODO(BEAM-11711): This could pass in the index as
                # a DeferredIndex, and we should fail it as order-sensitive.
                "df.set_index([pd.Index([1, 2, 3, 4]), 'year'])",
            ],
            'pandas.core.frame.DataFrame.set_axis': [
                # This should pass as set_axis(axis='columns')
                # and fail with set_axis(axis='index')
                "df.set_axis(['a', 'b', 'c'], axis='index')"
            ],
            'pandas.core.frame.DataFrame.to_markdown': ['*'],
            'pandas.core.frame.DataFrame.to_parquet': ['*'],

            # Raises right exception, but testing framework has matching issues.
            # Tested in `frames_test.py`.
            'pandas.core.frame.DataFrame.insert': [
                'df',
                'df.insert(1, "newcol", [99, 99])',
                'df.insert(0, "col1", [100, 100], allow_duplicates=True)'
            ],

            'pandas.core.frame.DataFrame.to_records': [
                'df.index = df.index.rename("I")',
                'index_dtypes = f"<S{df.index.str.len().max()}"', # 1.x
                'index_dtypes = "<S{}".format(df.index.str.len().max())', #0.x
                'df.to_records(index_dtypes=index_dtypes)',
            ],
            # These tests use the static method pd.pivot_table, which doesn't
            # actually raise NotImplementedError
            'pandas.core.frame.DataFrame.pivot_table': ['*'],
            # Expected to raise a ValueError, but we raise NotImplementedError
            'pandas.core.frame.DataFrame.pivot': [
                "df.pivot(index='foo', columns='bar', values='baz')"
            ],
            'pandas.core.frame.DataFrame.append': [
                'df',
                # pylint: disable=line-too-long
                "pd.concat([pd.DataFrame([i], columns=['A']) for i in range(5)],\n"
                "          ignore_index=True)"
            ],
            'pandas.core.frame.DataFrame.eval': ['df'],
            'pandas.core.frame.DataFrame.melt': [
                "df.columns = [list('ABC'), list('DEF')]", "df"
            ],
            'pandas.core.frame.DataFrame.merge': [
                # Order-sensitive index, checked in frames_test.py.
                "df1.merge(df2, left_on='lkey', right_on='rkey')",
                "df1.merge(df2, left_on='lkey', right_on='rkey',\n"
                "          suffixes=('_left', '_right'))",
                "df1.merge(df2, how='left', on='a')",
            ],
            # Raises right exception, but testing framework has matching issues.
            'pandas.core.frame.DataFrame.replace': [
                "df.replace({'a string': 'new value', True: False})  # raises"
            ],
            'pandas.core.frame.DataFrame.to_sparse': ['type(df)'],

            # Skipped because "seen_wont_implement" is reset before getting to
            # these calls, so the NameError they raise is not ignored.
            'pandas.core.frame.DataFrame.T': [
                'df1_transposed.dtypes', 'df2_transposed.dtypes'
            ],
            'pandas.core.frame.DataFrame.transpose': [
                'df1_transposed.dtypes', 'df2_transposed.dtypes'
            ],
            # Skipped because the relies on iloc to set a cell to NA. Test is
            # replicated in frames_test::DeferredFrameTest::test_applymap.
            'pandas.core.frame.DataFrame.applymap': [
                'df_copy.iloc[0, 0] = pd.NA',
                "df_copy.applymap(lambda x: len(str(x)), na_action='ignore')",
            ],
            # Skipped so we don't need to install natsort
            'pandas.core.frame.DataFrame.sort_values': [
                'from natsort import index_natsorted',
                'df.sort_values(\n'
                '   by="time",\n'
                '   key=lambda x: np.argsort(index_natsorted(df["time"]))\n'
                ')'
            ],
            # Mode that we don't yet support, documentation added in pandas
            # 1.2.0 (https://github.com/pandas-dev/pandas/issues/35912)
            'pandas.core.frame.DataFrame.aggregate': [
                "df.agg(x=('A', max), y=('B', 'min'), z=('C', np.mean))"
            ],
        })
    self.assertEqual(result.failed, 0)
Esempio n. 19
0
 def test_string_tests(self):
   # TODO(BEAM-10720)
   result = doctests.testmod(
       pd.core.strings,
       use_beam=False,
       skip={
           'pandas.core.strings.StringMethods': ['*'],
           'pandas.core.strings.StringMethods.capitalize': ['*'],
           'pandas.core.strings.StringMethods.casefold': ['*'],
           'pandas.core.strings.StringMethods.cat': ['*'],
           'pandas.core.strings.StringMethods.contains': ['*'],
           'pandas.core.strings.StringMethods.count': ['*'],
           'pandas.core.strings.StringMethods.endswith': ['*'],
           'pandas.core.strings.StringMethods.extract': ['*'],
           'pandas.core.strings.StringMethods.extractall': ['*'],
           'pandas.core.strings.StringMethods.findall': ['*'],
           'pandas.core.strings.StringMethods.get': ['*'],
           'pandas.core.strings.StringMethods.get_dummies': ['*'],
           'pandas.core.strings.StringMethods.isalnum': ['*'],
           'pandas.core.strings.StringMethods.isalpha': ['*'],
           'pandas.core.strings.StringMethods.isdecimal': ['*'],
           'pandas.core.strings.StringMethods.isdigit': ['*'],
           'pandas.core.strings.StringMethods.islower': ['*'],
           'pandas.core.strings.StringMethods.isnumeric': ['*'],
           'pandas.core.strings.StringMethods.isspace': ['*'],
           'pandas.core.strings.StringMethods.istitle': ['*'],
           'pandas.core.strings.StringMethods.isupper': ['*'],
           'pandas.core.strings.StringMethods.join': ['*'],
           'pandas.core.strings.StringMethods.len': ['*'],
           'pandas.core.strings.StringMethods.lower': ['*'],
           'pandas.core.strings.StringMethods.lstrip': ['*'],
           'pandas.core.strings.StringMethods.pad': ['*'],
           'pandas.core.strings.StringMethods.partition': ['*'],
           'pandas.core.strings.StringMethods.repeat': ['*'],
           'pandas.core.strings.StringMethods.replace': ['*'],
           'pandas.core.strings.StringMethods.rpartition': ['*'],
           'pandas.core.strings.StringMethods.rsplit': ['*'],
           'pandas.core.strings.StringMethods.rstrip': ['*'],
           'pandas.core.strings.StringMethods.slice': ['*'],
           'pandas.core.strings.StringMethods.slice_replace': ['*'],
           'pandas.core.strings.StringMethods.split': ['*'],
           'pandas.core.strings.StringMethods.startswith': ['*'],
           'pandas.core.strings.StringMethods.strip': ['*'],
           'pandas.core.strings.StringMethods.swapcase': ['*'],
           'pandas.core.strings.StringMethods.title': ['*'],
           'pandas.core.strings.StringMethods.upper': ['*'],
           'pandas.core.strings.StringMethods.wrap': ['*'],
           'pandas.core.strings.StringMethods.zfill': ['*'],
           'pandas.core.strings.str_contains': ['*'],
           'pandas.core.strings.str_count': ['*'],
           'pandas.core.strings.str_endswith': ['*'],
           'pandas.core.strings.str_extract': ['*'],
           'pandas.core.strings.str_extractall': ['*'],
           'pandas.core.strings.str_findall': ['*'],
           'pandas.core.strings.str_get': ['*'],
           'pandas.core.strings.str_get_dummies': ['*'],
           'pandas.core.strings.str_join': ['*'],
           'pandas.core.strings.str_pad': ['*'],
           'pandas.core.strings.str_repeat': ['*'],
           'pandas.core.strings.str_replace': ['*'],
           'pandas.core.strings.str_slice': ['*'],
           'pandas.core.strings.str_slice_replace': ['*'],
           'pandas.core.strings.str_startswith': ['*'],
           'pandas.core.strings.str_wrap': ['*'],
       })
   self.assertEqual(result.failed, 0)
Esempio n. 20
0
 def test_series_tests(self):
   result = doctests.testmod(
       pd.core.series,
       use_beam=False,
       report=True,
       wont_implement_ok={
           'pandas.core.series.Series.__array__': ['*'],
           'pandas.core.series.Series.array': ['*'],
           'pandas.core.series.Series.cummax': ['*'],
           'pandas.core.series.Series.cummin': ['*'],
           'pandas.core.series.Series.cumsum': ['*'],
           'pandas.core.series.Series.cumprod': ['*'],
           'pandas.core.series.Series.diff': ['*'],
           'pandas.core.series.Series.dot': [
               's.dot(arr)',  # non-deferred result
           ],
           'pandas.core.series.Series.fillna': [
               'df.fillna(method=\'ffill\')',
               'df.fillna(method="ffill")',
               'df.fillna(value=values, limit=1)',
           ],
           'pandas.core.series.Series.info': ['*'],
           'pandas.core.series.Series.items': ['*'],
           'pandas.core.series.Series.iteritems': ['*'],
           # default keep is 'first'
           'pandas.core.series.Series.nlargest': [
               "s.nlargest()",
               "s.nlargest(3)",
               "s.nlargest(3, keep='last')",
           ],
           'pandas.core.series.Series.memory_usage': ['*'],
           'pandas.core.series.Series.nsmallest': [
               "s.nsmallest()",
               "s.nsmallest(3)",
               "s.nsmallest(3, keep='last')",
           ],
           'pandas.core.series.Series.pop': ['*'],
           'pandas.core.series.Series.searchsorted': ['*'],
           'pandas.core.series.Series.shift': [
               'df.shift(periods=3)',
               'df.shift(periods=3, fill_value=0)',
           ],
           'pandas.core.series.Series.take': ['*'],
           'pandas.core.series.Series.to_dict': ['*'],
           'pandas.core.series.Series.unique': ['*'],
           'pandas.core.series.Series.unstack': ['*'],
           'pandas.core.series.Series.values': ['*'],
           'pandas.core.series.Series.view': ['*'],
           'pandas.core.series.Series.append': [
               's1.append(s2, ignore_index=True)',
           ],
           'pandas.core.series.Series.replace': [
               "s.replace([1, 2], method='bfill')",
               # Relies on method='pad'
               "s.replace('a')",
               # Relies on method='pad'
               # value=None is not valid for pandas < 1.4
               "s.replace('a', None)",
               # Implicitly uses method='pad', but output doesn't rely on that
               # behavior. Verified indepently in
               # frames_test.py::DeferredFrameTest::test_replace
               "df.replace(regex={r'^ba.$': 'new', 'foo': 'xyz'})"
           ],
           'pandas.core.series.Series.sort_index': ['*'],
           'pandas.core.series.Series.sort_values': ['*'],
           'pandas.core.series.Series.argmax': ['*'],
           'pandas.core.series.Series.argmin': ['*'],
           'pandas.core.series.Series.drop_duplicates': [
               's.drop_duplicates()',
               "s.drop_duplicates(keep='last')",
           ],
           'pandas.core.series.Series.reindex': ['*'],
           'pandas.core.series.Series.autocorr': ['*'],
           'pandas.core.series.Series.repeat': ['s.repeat([1, 2, 3])'],
           'pandas.core.series.Series.resample': ['*'],
           'pandas.core.series.Series': ['ser.iloc[0] = 999'],
       },
       not_implemented_ok={
           'pandas.core.series.Series.transform': [
               # str arg not supported. Tested with np.sum in
               # frames_test.py::DeferredFrameTest::test_groupby_transform_sum
               "df.groupby('Date')['Data'].transform('sum')",
           ],
           'pandas.core.series.Series.groupby': [
               'ser.groupby(["a", "b", "a", "b"]).mean()',
               'ser.groupby(["a", "b", "a", np.nan]).mean()',
               'ser.groupby(["a", "b", "a", np.nan], dropna=False).mean()',
           ],
       },
       skip={
           # Relies on setting values with iloc
           'pandas.core.series.Series': ['ser', 'r'],
           'pandas.core.series.Series.groupby': [
               # TODO(BEAM-11393): This example requires aligning two series
               # with non-unique indexes. It only works in pandas because
               # pandas can recognize the indexes are identical and elide the
               # alignment.
               'ser.groupby(ser > 100).mean()',
           ],
           'pandas.core.series.Series.asfreq': ['*'],
           # error formatting
           'pandas.core.series.Series.append': [
               's1.append(s2, verify_integrity=True)',
           ],
           'pandas.core.series.Series.cov': [
               # Differs in LSB on jenkins.
               "s1.cov(s2)",
           ],
           # Skipped idxmax/idxmin due an issue with the test framework
           'pandas.core.series.Series.idxmin': ['s.idxmin()'],
           'pandas.core.series.Series.idxmax': ['s.idxmax()'],
           'pandas.core.series.Series.duplicated': ['*'],
           'pandas.core.series.Series.set_axis': ['*'],
           'pandas.core.series.Series.nonzero': ['*'],
           'pandas.core.series.Series.pop': ['ser'],  # testing side effect
           # Raises right exception, but testing framework has matching issues.
           'pandas.core.series.Series.replace': [
               "df.replace({'a string': 'new value', True: False})  # raises"
           ],
           'pandas.core.series.Series.searchsorted': [
               # This doctest seems to be incorrectly parsed.
               "x = pd.Categorical(['apple', 'bread', 'bread',"
           ],
           'pandas.core.series.Series.to_csv': ['*'],
           'pandas.core.series.Series.to_markdown': ['*'],
           'pandas.core.series.Series.update': ['*'],
           'pandas.core.series.Series.view': [
               # Inspection after modification.
               's'
           ],
           'pandas.core.series.Series.resample': ['df'],
       })
   self.assertEqual(result.failed, 0)