Example #1
0
    def test_apply(self):
        with np.errstate(all='ignore'):
            # ufunc
            applied = self.frame.apply(np.sqrt)
            assert_series_equal(np.sqrt(self.frame['A']), applied['A'])

            # aggregator
            applied = self.frame.apply(np.mean)
            self.assertEqual(applied['A'], np.mean(self.frame['A']))

            d = self.frame.index[0]
            applied = self.frame.apply(np.mean, axis=1)
            self.assertEqual(applied[d], np.mean(self.frame.xs(d)))
            self.assertIs(applied.index, self.frame.index)  # want this

        # invalid axis
        df = DataFrame(
            [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'a', 'c'])
        self.assertRaises(ValueError, df.apply, lambda x: x, 2)

        # GH9573
        df = DataFrame({'c0': ['A', 'A', 'B', 'B'],
                        'c1': ['C', 'C', 'D', 'D']})
        df = df.apply(lambda ts: ts.astype('category'))
        self.assertEqual(df.shape, (4, 2))
        self.assertTrue(isinstance(df['c0'].dtype, CategoricalDtype))
        self.assertTrue(isinstance(df['c1'].dtype, CategoricalDtype))
Example #2
0
    def test_query_python(self):

        df = self.df
        result = df.query('A>0', engine='python')
        assert_frame_equal(result, self.expected1)
        result = df.eval('A+1', engine='python')
        assert_series_equal(result, self.expected2, check_names=False)
Example #3
0
    def test_sort_index_different_sortorder(self):
        A = np.arange(20).repeat(5)
        B = np.tile(np.arange(5), 20)

        indexer = np.random.permutation(100)
        A = A.take(indexer)
        B = B.take(indexer)

        df = DataFrame({'A': A, 'B': B,
                        'C': np.random.randn(100)})

        # use .sort_values #9816
        with tm.assert_produces_warning(FutureWarning):
            df.sort_index(by=['A', 'B'], ascending=[1, 0])
        result = df.sort_values(by=['A', 'B'], ascending=[1, 0])

        ex_indexer = np.lexsort((df.B.max() - df.B, df.A))
        expected = df.take(ex_indexer)
        assert_frame_equal(result, expected)

        # test with multiindex, too
        idf = df.set_index(['A', 'B'])

        result = idf.sort_index(ascending=[1, 0])
        expected = idf.take(ex_indexer)
        assert_frame_equal(result, expected)

        # also, Series!
        result = idf['C'].sort_index(ascending=[1, 0])
        assert_series_equal(result, expected['C'])
Example #4
0
    def test_secondary_y(self):
        import matplotlib.pyplot as plt

        ser = Series(np.random.randn(10))
        ser2 = Series(np.random.randn(10))
        ax = ser.plot(secondary_y=True)
        self.assertTrue(hasattr(ax, 'left_ax'))
        self.assertFalse(hasattr(ax, 'right_ax'))
        fig = ax.get_figure()
        axes = fig.get_axes()
        l = ax.get_lines()[0]
        xp = Series(l.get_ydata(), l.get_xdata())
        assert_series_equal(ser, xp)
        self.assertEqual(ax.get_yaxis().get_ticks_position(), 'right')
        self.assertFalse(axes[0].get_yaxis().get_visible())
        plt.close(fig)

        ax2 = ser2.plot()
        self.assertEqual(ax2.get_yaxis().get_ticks_position(), 'default')
        plt.close(ax2.get_figure())

        ax = ser2.plot()
        ax2 = ser.plot(secondary_y=True)
        self.assertTrue(ax.get_yaxis().get_visible())
        self.assertFalse(hasattr(ax, 'left_ax'))
        self.assertTrue(hasattr(ax, 'right_ax'))
        self.assertTrue(hasattr(ax2, 'left_ax'))
        self.assertFalse(hasattr(ax2, 'right_ax'))
Example #5
0
    def test_astype_with_tz(self):

        # with tz
        rng = date_range('1/1/2000', periods=10, tz='US/Eastern')
        result = rng.astype('datetime64[ns]')
        expected = (date_range('1/1/2000', periods=10,
                               tz='US/Eastern')
                    .tz_convert('UTC').tz_localize(None))
        tm.assert_index_equal(result, expected)

        # BUG#10442 : testing astype(str) is correct for Series/DatetimeIndex
        result = pd.Series(pd.date_range('2012-01-01', periods=3)).astype(str)
        expected = pd.Series(
            ['2012-01-01', '2012-01-02', '2012-01-03'], dtype=object)
        tm.assert_series_equal(result, expected)

        result = Series(pd.date_range('2012-01-01', periods=3,
                                      tz='US/Eastern')).astype(str)
        expected = Series(['2012-01-01 00:00:00-05:00',
                           '2012-01-02 00:00:00-05:00',
                           '2012-01-03 00:00:00-05:00'],
                          dtype=object)
        tm.assert_series_equal(result, expected)

        # GH 18951: tz-aware to tz-aware
        idx = date_range('20170101', periods=4, tz='US/Pacific')
        result = idx.astype('datetime64[ns, US/Eastern]')
        expected = date_range('20170101 03:00:00', periods=4, tz='US/Eastern')
        tm.assert_index_equal(result, expected)

        # GH 18951: tz-naive to tz-aware
        idx = date_range('20170101', periods=4)
        result = idx.astype('datetime64[ns, US/Eastern]')
        expected = date_range('20170101', periods=4, tz='US/Eastern')
        tm.assert_index_equal(result, expected)
Example #6
0
    def test_timegrouper_with_reg_groups_freq(self, freq):
        # GH 6764 multiple grouping with/without sort
        df = DataFrame({
            'date': pd.to_datetime([
                '20121002', '20121007', '20130130', '20130202', '20130305',
                '20121002', '20121207', '20130130', '20130202', '20130305',
                '20130202', '20130305'
            ]),
            'user_id': [1, 1, 1, 1, 1, 3, 3, 3, 5, 5, 5, 5],
            'whole_cost': [1790, 364, 280, 259, 201, 623, 90, 312, 359, 301,
                           359, 801],
            'cost1': [12, 15, 10, 24, 39, 1, 0, 90, 45, 34, 1, 12]
        }).set_index('date')

        expected = (
            df.groupby('user_id')['whole_cost']
              .resample(freq)
              .sum(min_count=1)  # XXX
              .dropna()
              .reorder_levels(['date', 'user_id'])
              .sort_index()
              .astype('int64')
        )
        expected.name = 'whole_cost'

        result1 = df.sort_index().groupby([pd.Grouper(freq=freq),
                                           'user_id'])['whole_cost'].sum()
        assert_series_equal(result1, expected)

        result2 = df.groupby([pd.Grouper(freq=freq), 'user_id'])[
            'whole_cost'].sum()
        assert_series_equal(result2, expected)
Example #7
0
 def test_groupby_max_datetime64(self):
     # GH 5869
     # datetimelike dtype conversion from int
     df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5)))
     expected = df.groupby('A')['A'].apply(lambda x: x.max())
     result = df.groupby('A')['A'].max()
     assert_series_equal(result, expected)
Example #8
0
    def test_setitem_ndarray_1d(self):
        # GH5508

        # len of indexer vs length of the 1d ndarray
        df = DataFrame(index=Index(lrange(1, 11)))
        df['foo'] = np.zeros(10, dtype=np.float64)
        df['bar'] = np.zeros(10, dtype=np.complex)

        # invalid
        def f():
            df.loc[df.index[2:5], 'bar'] = np.array([2.33j, 1.23 + 0.1j,
                                                     2.2, 1.0])

        pytest.raises(ValueError, f)

        # valid
        df.loc[df.index[2:6], 'bar'] = np.array([2.33j, 1.23 + 0.1j,
                                                 2.2, 1.0])

        result = df.loc[df.index[2:6], 'bar']
        expected = Series([2.33j, 1.23 + 0.1j, 2.2, 1.0], index=[3, 4, 5, 6],
                          name='bar')
        tm.assert_series_equal(result, expected)

        # dtype getting changed?
        df = DataFrame(index=Index(lrange(1, 11)))
        df['foo'] = np.zeros(10, dtype=np.float64)
        df['bar'] = np.zeros(10, dtype=np.complex)

        def f():
            df[2:5] = np.arange(1, 4) * 1j

        pytest.raises(ValueError, f)
Example #9
0
    def test_coercion_with_loc_and_series(self):
        for start_data, expected_result in self.EXPECTED_RESULTS:
            start_series = Series(start_data)
            start_series.loc[start_series == start_series[0]] = None

            expected_series = Series(expected_result)
            tm.assert_series_equal(start_series, expected_series)
Example #10
0
    def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2):
        # GH #19686
        # .loc should work with nested indexers which can be
        # any list-like objects (see `pandas.api.types.is_list_like`) or slices

        def convert_nested_indexer(indexer_type, keys):
            if indexer_type == np.ndarray:
                return np.array(keys)
            if indexer_type == slice:
                return slice(*keys)
            return indexer_type(keys)

        a = [10, 20, 30]
        b = [1, 2, 3]
        index = pd.MultiIndex.from_product([a, b])
        df = pd.DataFrame(
            np.arange(len(index), dtype='int64'),
            index=index, columns=['Data'])

        keys = ([10, 20], [2, 3])
        types = (indexer_type_1, indexer_type_2)

        # check indexers with all the combinations of nested objects
        # of all the valid types
        indexer = tuple(
            convert_nested_indexer(indexer_type, k)
            for indexer_type, k in zip(types, keys))

        result = df.loc[indexer, 'Data']
        expected = pd.Series(
            [1, 2, 4, 5], name='Data',
            index=pd.MultiIndex.from_product(keys))

        tm.assert_series_equal(result, expected)
Example #11
0
    def test_crosstab_margins(self):
        a = np.random.randint(0, 7, size=100)
        b = np.random.randint(0, 3, size=100)
        c = np.random.randint(0, 5, size=100)

        df = DataFrame({'a': a, 'b': b, 'c': c})

        result = crosstab(a, [b, c], rownames=['a'], colnames=('b', 'c'),
                          margins=True)

        self.assertEqual(result.index.names, ('a',))
        self.assertEqual(result.columns.names, ['b', 'c'])

        all_cols = result['All', '']
        exp_cols = df.groupby(['a']).size().astype('i8')
        exp_cols = exp_cols.append(Series([len(df)], index=['All']))

        tm.assert_series_equal(all_cols, exp_cols)

        all_rows = result.ix['All']
        exp_rows = df.groupby(['b', 'c']).size().astype('i8')
        exp_rows = exp_rows.append(Series([len(df)], index=[('All', '')]))

        exp_rows = exp_rows.reindex(all_rows.index)
        exp_rows = exp_rows.fillna(0).astype(np.int64)
        tm.assert_series_equal(all_rows, exp_rows)
Example #12
0
 def test_constructor_numpy_scalar(self):
     # GH 19342
     # construction with a numpy scalar
     # should not raise
     result = Series(np.array(100), index=np.arange(4), dtype='int64')
     expected = Series(100, index=np.arange(4), dtype='int64')
     tm.assert_series_equal(result, expected)
Example #13
0
 def test_constructor_dict_of_tuples(self):
     data = {(1, 2): 3,
             (None, 5): 6}
     result = Series(data).sort_values()
     expected = Series([3, 6],
                       index=MultiIndex.from_tuples([(1, 2), (None, 5)]))
     tm.assert_series_equal(result, expected)
Example #14
0
 def test_constructor_mixed_tz(self):
     s = Series([Timestamp('20130101'),
                 Timestamp('20130101', tz='US/Eastern')])
     expected = Series([Timestamp('20130101'),
                        Timestamp('20130101', tz='US/Eastern')],
                       dtype='object')
     assert_series_equal(s, expected)
Example #15
0
    def test_constructor_series(self):
        index1 = ['d', 'b', 'a', 'c']
        index2 = sorted(index1)
        s1 = Series([4, 7, -5, 3], index=index1)
        s2 = Series(s1, index=index2)

        assert_series_equal(s2, s1.sort_index())
Example #16
0
def test_make_choices_real_probs(random_seed, utilities):
    probs = mnl.utils_to_probs(utilities)
    choices = mnl.make_choices(probs)

    pdt.assert_series_equal(
        choices,
        pd.Series([1, 2], index=[0, 1]))
Example #17
0
 def test_groupby_to_scalar_to_series_1(self):
     s = pd.Series([1, 2, 3, 4])
     labels = ['a', 'a', 'b', 'b']
     benchmark = s.groupby(labels).apply(max)
     result = pandas_easy.groupby_to_scalar_to_series(
         s, max, 1, by=labels)
     assert_series_equal(result, benchmark)
Example #18
0
def test_dirint_min_cos_zenith_max_zenith():
    # map out behavior under difficult conditions with various
    # limiting kwargs settings
    # times don't have any physical relevance
    times = pd.DatetimeIndex(['2014-06-24T12-0700','2014-06-24T18-0700'])
    ghi = pd.Series([0, 1], index=times)
    solar_zenith = pd.Series([90, 89.99], index=times)

    out = irradiance.dirint(ghi, solar_zenith, times)
    expected = pd.Series([0.0, 0.0], index=times, name='dni')
    assert_series_equal(out, expected)

    out = irradiance.dirint(ghi, solar_zenith, times, min_cos_zenith=0)
    expected = pd.Series([0.0, 0.0], index=times, name='dni')
    assert_series_equal(out, expected)

    out = irradiance.dirint(ghi, solar_zenith, times, max_zenith=90)
    expected = pd.Series([0.0, 0.0], index=times, name='dni')
    assert_series_equal(out, expected, check_less_precise=True)

    out = irradiance.dirint(ghi, solar_zenith, times, min_cos_zenith=0,
                            max_zenith=90)
    expected = pd.Series([0.0, 144.264507], index=times, name='dni')
    assert_series_equal(out, expected, check_less_precise=True)

    out = irradiance.dirint(ghi, solar_zenith, times, min_cos_zenith=0,
                            max_zenith=100)
    expected = pd.Series([0.0, 144.264507], index=times, name='dni')
    assert_series_equal(out, expected, check_less_precise=True)
Example #19
0
def test_clearness_index_zenith_independent(airmass_kt):
    clearness_index = np.array([-1, 0, .1, 1])
    clearness_index, airmass_kt = np.meshgrid(clearness_index, airmass_kt)
    out = irradiance.clearness_index_zenith_independent(clearness_index,
                                                        airmass_kt)
    expected = np.array(
        [[0.   , 0.   , 0.1  , 1.   ],
         [0.   , 0.   , 0.138, 1.383],
         [0.   , 0.   , 0.182, 1.822],
         [0.   , 0.   , 0.212, 2.   ]])
    assert_allclose(out, expected, atol=0.001)
    # test max_clearness_index
    out = irradiance.clearness_index_zenith_independent(
        clearness_index, airmass_kt, max_clearness_index=0.82)
    expected = np.array(
        [[ 0.   ,  0.   ,  0.1  ,  0.82 ],
         [ 0.   ,  0.   ,  0.138,  0.82 ],
         [ 0.   ,  0.   ,  0.182,  0.82 ],
         [ 0.   ,  0.   ,  0.212,  0.82 ]])
    assert_allclose(out, expected, atol=0.001)
    # scalars
    out = irradiance.clearness_index_zenith_independent(.4, 2)
    expected = 0.443
    assert_allclose(out, expected, atol=0.001)
    # series
    times = pd.DatetimeIndex(start='20180601', periods=2, freq='12H')
    clearness_index = pd.Series([0, .5], index=times)
    airmass = pd.Series([np.nan, 2], index=times)
    out = irradiance.clearness_index_zenith_independent(clearness_index,
                                                        airmass)
    expected = pd.Series([np.nan, 0.553744437562], index=times)
    assert_series_equal(out, expected)
    def test_quarterly_resampling(self):
        rng = period_range('2000Q1', periods=10, freq='Q-DEC')
        ts = Series(np.arange(10), index=rng)

        result = ts.resample('A')
        exp = ts.to_timestamp().resample('A').to_period()
        assert_series_equal(result, exp)
Example #21
0
def test_perez_components(irrad_data, ephem_data, dni_et, relative_airmass):
    dni = irrad_data['dni'].copy()
    dni.iloc[2] = np.nan
    out = irradiance.perez(40, 180, irrad_data['dhi'], dni,
                           dni_et, ephem_data['apparent_zenith'],
                           ephem_data['azimuth'], relative_airmass,
                           return_components=True)
    expected = pd.DataFrame(np.array(
        [[   0.        ,   31.46046871,  np.nan,   45.45539877],
         [  0.        ,  26.84138589,          np.nan,  31.72696071],
         [ 0.        ,  0.        ,         np.nan,  4.47966439],
         [ 0.        ,  4.62212181,         np.nan,  9.25316454]]).T,
        columns=['sky_diffuse', 'isotropic', 'circumsolar', 'horizon'],
        index=irrad_data.index
    )
    if pandas_0_22():
        expected_for_sum = expected['sky_diffuse'].copy()
        expected_for_sum.iloc[2] = 0
    else:
        expected_for_sum = expected['sky_diffuse']
    sum_components = out.iloc[:, 1:].sum(axis=1)
    sum_components.name = 'sky_diffuse'

    assert_frame_equal(out, expected, check_less_precise=2)
    assert_series_equal(sum_components, expected_for_sum, check_less_precise=2)
    def test_resample_to_quarterly(self):
        for month in MONTHS:
            ts = _simple_pts('1990', '1992', freq='A-%s' % month)
            quar_ts = ts.resample('Q-%s' % month, fill_method='ffill')

            stamps = ts.to_timestamp('D', how='start')
            qdates = period_range(ts.index[0].asfreq('D', 'start'),
                                  ts.index[-1].asfreq('D', 'end'),
                                  freq='Q-%s' % month)

            expected = stamps.reindex(qdates.to_timestamp('D', 'e'),
                                      method='ffill')
            expected.index = qdates

            assert_series_equal(quar_ts, expected)

        # conforms, but different month
        ts = _simple_pts('1990', '1992', freq='A-JUN')

        for how in ['start', 'end']:
            result = ts.resample('Q-MAR', convention=how, fill_method='ffill')
            expected = ts.asfreq('Q-MAR', how=how)
            expected = expected.reindex(result.index, method='ffill')

            # .to_timestamp('D')
            # expected = expected.resample('Q-MAR', fill_method='ffill')

            assert_series_equal(result, expected)
    def test_numpy_reduction(self):
        result = self.ts.resample('A', how='prod', closed='right')

        expected = self.ts.groupby(lambda x: x.year).agg(np.prod)
        expected.index = result.index

        assert_series_equal(result, expected)
Example #24
0
    def test_astype_unicode(self):

        # GH7758
        # a bit of magic is required to set default encoding encoding to utf-8
        digits = string.digits
        test_series = [
            Series([digits * 10,
                    tm.rands(63),
                    tm.rands(64),
                    tm.rands(1000)]),
            Series([u('データーサイエンス、お前はもう死んでいる')]),
        ]

        former_encoding = None
        if not compat.PY3:
            # in python we can force the default encoding for this test
            former_encoding = sys.getdefaultencoding()
            reload(sys)  # noqa
            sys.setdefaultencoding("utf-8")
        if sys.getdefaultencoding() == "utf-8":
            test_series.append(Series([u('野菜食べないとやばい').encode("utf-8")]))
        for s in test_series:
            res = s.astype("unicode")
            expec = s.map(compat.text_type)
            assert_series_equal(res, expec)
        # restore the former encoding
        if former_encoding is not None and former_encoding != "utf-8":
            reload(sys)  # noqa
            sys.setdefaultencoding(former_encoding)
Example #25
0
def test_conversions(data_missing):

    # astype to object series
    df = pd.DataFrame({'A': data_missing})
    result = df['A'].astype('object')
    expected = pd.Series(np.array([np.nan, 1], dtype=object), name='A')
    tm.assert_series_equal(result, expected)

    # convert to object ndarray
    # we assert that we are exactly equal
    # including type conversions of scalars
    result = df['A'].astype('object').values
    expected = np.array([np.nan, 1], dtype=object)
    tm.assert_numpy_array_equal(result, expected)

    for r, e in zip(result, expected):
        if pd.isnull(r):
            assert pd.isnull(e)
        elif is_integer(r):
            # PY2 can be int or long
            assert r == e
            assert is_integer(e)
        else:
            assert r == e
            assert type(r) == type(e)
Example #26
0
def assert_series_equal(left, right, check_names=True, **kwargs):
    """Backwards compatibility wrapper for
    ``pandas.util.testing.assert_series_equal``

    Examples
    --------
    >>> import pandas as pd
    >>> s = pd.Series(list('abc'), name='a')
    >>> s2 = pd.Series(list('abc'), name='b')
    >>> assert_series_equal(s, s2)  # doctest: +ELLIPSIS
    Traceback (most recent call last):
        ...
    AssertionError: ...
    >>> assert_series_equal(s, s2, check_names=False)

    See Also
    --------
    pandas.util.testing.assert_series_equal
    """
    try:
        return tm.assert_series_equal(left, right, check_names=check_names,
                                      **kwargs)
    except TypeError:
        if check_names:
            assert left.name == right.name
        return tm.assert_series_equal(left, right, **kwargs)
Example #27
0
def test_duplicate_dates_indexing(dups):
    ts = dups

    uniques = ts.index.unique()
    for date in uniques:
        result = ts[date]

        mask = ts.index == date
        total = (ts.index == date).sum()
        expected = ts[mask]
        if total > 1:
            assert_series_equal(result, expected)
        else:
            assert_almost_equal(result, expected[0])

        cp = ts.copy()
        cp[date] = 0
        expected = Series(np.where(mask, 0, ts), index=ts.index)
        assert_series_equal(cp, expected)

    pytest.raises(KeyError, ts.__getitem__, datetime(2000, 1, 6))

    # new index
    ts[datetime(2000, 1, 6)] = 0
    assert ts[datetime(2000, 1, 6)] == 0
    def test_upsample_with_limit(self):
        rng = date_range('1/1/2000', periods=3, freq='5t')
        ts = Series(np.random.randn(len(rng)), rng)

        result = ts.resample('t', fill_method='ffill', limit=2)
        expected = ts.reindex(result.index, method='ffill', limit=2)
        assert_series_equal(result, expected)
Example #29
0
    def test_combineFrame(self, float_frame, mixed_float_frame,
                          mixed_int_frame):
        frame_copy = float_frame.reindex(float_frame.index[::2])

        del frame_copy['D']
        frame_copy['C'][:5] = np.nan

        added = float_frame + frame_copy

        indexer = added['A'].dropna().index
        exp = (float_frame['A'] * 2).copy()

        tm.assert_series_equal(added['A'].dropna(), exp.loc[indexer])

        exp.loc[~exp.index.isin(indexer)] = np.nan
        tm.assert_series_equal(added['A'], exp.loc[added['A'].index])

        assert np.isnan(added['C'].reindex(frame_copy.index)[:5]).all()

        # assert(False)

        assert np.isnan(added['D']).all()

        self_added = float_frame + float_frame
        tm.assert_index_equal(self_added.index, float_frame.index)

        added_rev = frame_copy + float_frame
        assert np.isnan(added['D']).all()
        assert np.isnan(added_rev['D']).all()

        # corner cases

        # empty
        plus_empty = float_frame + DataFrame()
        assert np.isnan(plus_empty.values).all()

        empty_plus = DataFrame() + float_frame
        assert np.isnan(empty_plus.values).all()

        empty_empty = DataFrame() + DataFrame()
        assert empty_empty.empty

        # out of order
        reverse = float_frame.reindex(columns=float_frame.columns[::-1])

        assert_frame_equal(reverse + float_frame, float_frame * 2)

        # mix vs float64, upcast
        added = float_frame + mixed_float_frame
        _check_mixed_float(added, dtype='float64')
        added = mixed_float_frame + float_frame
        _check_mixed_float(added, dtype='float64')

        # mix vs mix
        added = mixed_float_frame + mixed_float_frame
        _check_mixed_float(added, dtype=dict(C=None))

        # with int
        added = float_frame + mixed_int_frame
        _check_mixed_float(added, dtype='float64')
    def test_resample_5minute(self):
        rng = period_range('1/1/2000', '1/5/2000', freq='T')
        ts = TimeSeries(np.random.randn(len(rng)), index=rng)

        result = ts.resample('5min')
        expected = ts.to_timestamp().resample('5min')
        assert_series_equal(result, expected)
Example #31
0
    def test_Component_conversion(self):

        comp = Component(np.arange(5))
        series = pd.Series(np.arange(5))

        assert_series_equal(series, comp.to_series())
Example #32
0
 def test_pickle_strings(self, string_series):
     unp_series = self._pickle_roundtrip(string_series)
     tm.assert_series_equal(unp_series, string_series)
Example #33
0
 def test_pickle_datetimes(self, datetime_series):
     unp_ts = self._pickle_roundtrip(datetime_series)
     tm.assert_series_equal(unp_ts, datetime_series)
Example #34
0
def test_percentiles():
    dates = [
        date(2019, 1, 1),
        date(2019, 1, 2),
        date(2019, 1, 3),
        date(2019, 1, 4),
        date(2019, 1, 7),
        date(2019, 1, 8),
    ]

    x = pd.Series([3.0, 2.0, 3.0, 1.0, 3.0, 6.0], index=dates)
    y = pd.Series([3.5, 1.8, 2.9, 1.2, 3.1, 6.0], index=dates)

    assert_series_equal(percentiles(pd.Series([]), y), pd.Series([]))
    assert_series_equal(percentiles(x, pd.Series([])), pd.Series())
    assert_series_equal(percentiles(x, y, Window(7, 0)), pd.Series())

    result = percentiles(x, y, 2)
    expected = pd.Series([50.0, 50.0, 100.0, 75.0], index=dates[2:])
    assert_series_equal(result, expected, obj="percentiles with window length 2")

    result = percentiles(x, y, Window(2, 0))
    expected = pd.Series([100.0, 0.0, 50.0, 50.0, 100.0, 75.0], index=dates)
    assert_series_equal(result, expected, obj="percentiles with window 2 and ramp 0")

    result = percentiles(x, y, Window('1w', 0))
    expected = pd.Series([100.0, 0.0, 33.333333, 25.0, 100.0, 90.0], index=dates)
    assert_series_equal(result, expected, obj="percentiles with window 1w")

    result = percentiles(x, y, Window('1w', '3d'))
    expected = pd.Series([25.0, 100.0, 90.0], index=dates[3:])
    assert_series_equal(result, expected, obj="percentiles with window 1w and ramp 3d")

    result = percentiles(x)
    expected = pd.Series([50.0, 25.0, 66.667, 12.500, 70.0, 91.667], index=dates)
    assert_series_equal(result, expected, obj="percentiles over historical values", check_less_precise=True)

    result = percentiles(x, y)
    expected = pd.Series([100.0, 0.0, 33.333, 25.0, 100.0, 91.667], index=dates)
    assert_series_equal(result, expected, obj="percentiles without window length", check_less_precise=True)

    with pytest.raises(ValueError):
        percentiles(x, pd.Series(), Window(6, 1))
Example #35
0
def test_zscores():
    assert_series_equal(zscores(pd.Series()), pd.Series())
    assert_series_equal(zscores(pd.Series(), 1), pd.Series())

    assert_series_equal(zscores(pd.Series([1])), pd.Series([0.0]))
    assert_series_equal(zscores(pd.Series([1]), Window(1, 0)), pd.Series([0.0]))

    dates = [
        date(2019, 1, 1),
        date(2019, 1, 2),
        date(2019, 1, 3),
        date(2019, 1, 4),
        date(2019, 1, 7),
        date(2019, 1, 8),
    ]

    x = pd.Series([3.0, 2.0, 3.0, 1.0, 3.0, 6.0], index=dates)

    result = zscores(x)
    expected = pd.Series([0.000000, -0.597614, 0.000000, -1.195229, 0.000000, 1.792843], index=dates)
    assert_series_equal(result, expected, obj="z-score", check_less_precise=True)

    assert_series_equal(result, (x - x.mean()) / x.std(), obj="full series zscore")

    result = zscores(x, Window(2, 0))
    expected = pd.Series([0.0, -0.707107, 0.707107, -0.707107, 0.707107, 0.707107], index=dates)
    assert_series_equal(result, expected, obj="z-score window 2", check_less_precise=True)
    assert_series_equal(zscores(x, Window(5, 5)), zscores(x, 5))

    result = zscores(x, Window('1w', 0))
    expected = pd.Series([0.0, -0.707106, 0.577350, -1.305582, 0.670820, 1.603567], index=dates)
    assert_series_equal(result, expected, obj="z-score window 1w", check_less_precise=True)
Example #36
0
    def test_CoordinateComponent_conversion(self):

        d = Data(x=[1, 2, 3])
        series = pd.Series(np.array([0, 1, 2]))
        comp = d.get_component(d.pixel_component_ids[0])
        assert_series_equal(series, comp.to_series())
Example #37
0
    def test_CategoricalComponent_conversion(self):

        comp = CategoricalComponent(np.array(['a', 'b', 'c', 'd']))
        series = pd.Series(['a', 'b', 'c', 'd'])

        assert_series_equal(series, comp.to_series())
Example #38
0
    def test_ops_series_object(self):
        # GH 13043
        s = pd.Series([
            pd.Timestamp('2015-01-01', tz='US/Eastern'),
            pd.Timestamp('2015-01-01', tz='Asia/Tokyo')
        ],
                      name='xxx')
        self.assertEqual(s.dtype, object)

        exp = pd.Series([
            pd.Timestamp('2015-01-02', tz='US/Eastern'),
            pd.Timestamp('2015-01-02', tz='Asia/Tokyo')
        ],
                        name='xxx')
        tm.assert_series_equal(s + pd.Timedelta('1 days'), exp)
        tm.assert_series_equal(pd.Timedelta('1 days') + s, exp)

        # object series & object series
        s2 = pd.Series([
            pd.Timestamp('2015-01-03', tz='US/Eastern'),
            pd.Timestamp('2015-01-05', tz='Asia/Tokyo')
        ],
                       name='xxx')
        self.assertEqual(s2.dtype, object)
        exp = pd.Series([pd.Timedelta('2 days'),
                         pd.Timedelta('4 days')],
                        name='xxx')
        tm.assert_series_equal(s2 - s, exp)
        tm.assert_series_equal(s - s2, -exp)

        s = pd.Series([pd.Timedelta('01:00:00'),
                       pd.Timedelta('02:00:00')],
                      name='xxx',
                      dtype=object)
        self.assertEqual(s.dtype, object)

        exp = pd.Series([pd.Timedelta('01:30:00'),
                         pd.Timedelta('02:30:00')],
                        name='xxx')
        tm.assert_series_equal(s + pd.Timedelta('00:30:00'), exp)
        tm.assert_series_equal(pd.Timedelta('00:30:00') + s, exp)
Example #39
0
    def test_timedelta_ops_with_missing_values(self):
        # setup
        s1 = pd.to_timedelta(Series(['00:00:01']))
        s2 = pd.to_timedelta(Series(['00:00:02']))
        sn = pd.to_timedelta(Series([pd.NaT]))
        df1 = DataFrame(['00:00:01']).apply(pd.to_timedelta)
        df2 = DataFrame(['00:00:02']).apply(pd.to_timedelta)
        dfn = DataFrame([pd.NaT]).apply(pd.to_timedelta)
        scalar1 = pd.to_timedelta('00:00:01')
        scalar2 = pd.to_timedelta('00:00:02')
        timedelta_NaT = pd.to_timedelta('NaT')
        NA = np.nan

        actual = scalar1 + scalar1
        self.assertEqual(actual, scalar2)
        actual = scalar2 - scalar1
        self.assertEqual(actual, scalar1)

        actual = s1 + s1
        assert_series_equal(actual, s2)
        actual = s2 - s1
        assert_series_equal(actual, s1)

        actual = s1 + scalar1
        assert_series_equal(actual, s2)
        actual = scalar1 + s1
        assert_series_equal(actual, s2)
        actual = s2 - scalar1
        assert_series_equal(actual, s1)
        actual = -scalar1 + s2
        assert_series_equal(actual, s1)

        actual = s1 + timedelta_NaT
        assert_series_equal(actual, sn)
        actual = timedelta_NaT + s1
        assert_series_equal(actual, sn)
        actual = s1 - timedelta_NaT
        assert_series_equal(actual, sn)
        actual = -timedelta_NaT + s1
        assert_series_equal(actual, sn)

        actual = s1 + NA
        assert_series_equal(actual, sn)
        actual = NA + s1
        assert_series_equal(actual, sn)
        actual = s1 - NA
        assert_series_equal(actual, sn)
        actual = -NA + s1
        assert_series_equal(actual, sn)

        actual = s1 + pd.NaT
        assert_series_equal(actual, sn)
        actual = s2 - pd.NaT
        assert_series_equal(actual, sn)

        actual = s1 + df1
        assert_frame_equal(actual, df2)
        actual = s2 - df1
        assert_frame_equal(actual, df1)
        actual = df1 + s1
        assert_frame_equal(actual, df2)
        actual = df2 - s1
        assert_frame_equal(actual, df1)

        actual = df1 + df1
        assert_frame_equal(actual, df2)
        actual = df2 - df1
        assert_frame_equal(actual, df1)

        actual = df1 + scalar1
        assert_frame_equal(actual, df2)
        actual = df2 - scalar1
        assert_frame_equal(actual, df1)

        actual = df1 + timedelta_NaT
        assert_frame_equal(actual, dfn)
        actual = df1 - timedelta_NaT
        assert_frame_equal(actual, dfn)

        actual = df1 + NA
        assert_frame_equal(actual, dfn)
        actual = df1 - NA
        assert_frame_equal(actual, dfn)

        actual = df1 + pd.NaT  # NaT is datetime, not timedelta
        assert_frame_equal(actual, dfn)
        actual = df1 - pd.NaT
        assert_frame_equal(actual, dfn)
Example #40
0
    def test_timedelta_ops_with_missing_values(self):
        # setup
        s1 = pd.to_timedelta(Series(['00:00:01']))
        s2 = pd.to_timedelta(Series(['00:00:02']))
        sn = pd.to_timedelta(Series([pd.NaT]))
        df1 = pd.DataFrame(['00:00:01']).apply(pd.to_timedelta)
        df2 = pd.DataFrame(['00:00:02']).apply(pd.to_timedelta)
        dfn = pd.DataFrame([pd.NaT]).apply(pd.to_timedelta)
        scalar1 = pd.to_timedelta('00:00:01')
        scalar2 = pd.to_timedelta('00:00:02')
        timedelta_NaT = pd.to_timedelta('NaT')

        actual = scalar1 + scalar1
        assert actual == scalar2
        actual = scalar2 - scalar1
        assert actual == scalar1

        actual = s1 + s1
        tm.assert_series_equal(actual, s2)
        actual = s2 - s1
        tm.assert_series_equal(actual, s1)

        actual = s1 + scalar1
        tm.assert_series_equal(actual, s2)
        actual = scalar1 + s1
        tm.assert_series_equal(actual, s2)
        actual = s2 - scalar1
        tm.assert_series_equal(actual, s1)
        actual = -scalar1 + s2
        tm.assert_series_equal(actual, s1)

        actual = s1 + timedelta_NaT
        tm.assert_series_equal(actual, sn)
        actual = timedelta_NaT + s1
        tm.assert_series_equal(actual, sn)
        actual = s1 - timedelta_NaT
        tm.assert_series_equal(actual, sn)
        actual = -timedelta_NaT + s1
        tm.assert_series_equal(actual, sn)

        with pytest.raises(TypeError):
            s1 + np.nan
        with pytest.raises(TypeError):
            np.nan + s1
        with pytest.raises(TypeError):
            s1 - np.nan
        with pytest.raises(TypeError):
            -np.nan + s1

        actual = s1 + pd.NaT
        tm.assert_series_equal(actual, sn)
        actual = s2 - pd.NaT
        tm.assert_series_equal(actual, sn)

        actual = s1 + df1
        tm.assert_frame_equal(actual, df2)
        actual = s2 - df1
        tm.assert_frame_equal(actual, df1)
        actual = df1 + s1
        tm.assert_frame_equal(actual, df2)
        actual = df2 - s1
        tm.assert_frame_equal(actual, df1)

        actual = df1 + df1
        tm.assert_frame_equal(actual, df2)
        actual = df2 - df1
        tm.assert_frame_equal(actual, df1)

        actual = df1 + scalar1
        tm.assert_frame_equal(actual, df2)
        actual = df2 - scalar1
        tm.assert_frame_equal(actual, df1)

        actual = df1 + timedelta_NaT
        tm.assert_frame_equal(actual, dfn)
        actual = df1 - timedelta_NaT
        tm.assert_frame_equal(actual, dfn)

        with pytest.raises(TypeError):
            df1 + np.nan
        with pytest.raises(TypeError):
            df1 - np.nan

        actual = df1 + pd.NaT  # NaT is datetime, not timedelta
        tm.assert_frame_equal(actual, dfn)
        actual = df1 - pd.NaT
        tm.assert_frame_equal(actual, dfn)
Example #41
0
    def test_grouper_creation_bug(self):

        # GH 8795
        df = DataFrame({'A': [0, 0, 1, 1, 2, 2], 'B': [1, 2, 3, 4, 5, 6]})
        g = df.groupby('A')
        expected = g.sum()

        g = df.groupby(pd.Grouper(key='A'))
        result = g.sum()
        assert_frame_equal(result, expected)

        result = g.apply(lambda x: x.sum())
        assert_frame_equal(result, expected)

        g = df.groupby(pd.Grouper(key='A', axis=0))
        result = g.sum()
        assert_frame_equal(result, expected)

        # GH14334
        # pd.Grouper(key=...) may be passed in a list
        df = DataFrame({
            'A': [0, 0, 0, 1, 1, 1],
            'B': [1, 1, 2, 2, 3, 3],
            'C': [1, 2, 3, 4, 5, 6]
        })
        # Group by single column
        expected = df.groupby('A').sum()
        g = df.groupby([pd.Grouper(key='A')])
        result = g.sum()
        assert_frame_equal(result, expected)

        # Group by two columns
        # using a combination of strings and Grouper objects
        expected = df.groupby(['A', 'B']).sum()

        # Group with two Grouper objects
        g = df.groupby([pd.Grouper(key='A'), pd.Grouper(key='B')])
        result = g.sum()
        assert_frame_equal(result, expected)

        # Group with a string and a Grouper object
        g = df.groupby(['A', pd.Grouper(key='B')])
        result = g.sum()
        assert_frame_equal(result, expected)

        # Group with a Grouper object and a string
        g = df.groupby([pd.Grouper(key='A'), 'B'])
        result = g.sum()
        assert_frame_equal(result, expected)

        # GH8866
        s = Series(
            np.arange(8, dtype='int64'),
            index=pd.MultiIndex.from_product(
                [list('ab'),
                 range(2),
                 date_range('20130101', periods=2)],
                names=['one', 'two', 'three']))
        result = s.groupby(pd.Grouper(level='three', freq='M')).sum()
        expected = Series([28],
                          index=Index([Timestamp('2013-01-31')],
                                      freq='M',
                                      name='three'))
        assert_series_equal(result, expected)

        # just specifying a level breaks
        result = s.groupby(pd.Grouper(level='one')).sum()
        expected = s.groupby(level='one').sum()
        assert_series_equal(result, expected)
Example #42
0
 def test_compare_timedelta_series(self):
     # regresssion test for GH5963
     s = pd.Series([timedelta(days=1), timedelta(days=2)])
     actual = s > timedelta(days=1)
     expected = pd.Series([False, True])
     tm.assert_series_equal(actual, expected)
Example #43
0
    def test_predict(self):
        y = tm.makeTimeSeries()
        x = tm.makeTimeDataFrame()
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model1 = ols(y=y, x=x)
        assert_series_equal(model1.predict(), model1.y_predict)
        assert_series_equal(model1.predict(x=x), model1.y_predict)
        assert_series_equal(model1.predict(beta=model1.beta), model1.y_predict)

        exog = x.copy()
        exog['intercept'] = 1.
        rs = Series(np.dot(exog.values, model1.beta.values), x.index)
        assert_series_equal(model1.y_predict, rs)

        x2 = x.reindex(columns=x.columns[::-1])
        assert_series_equal(model1.predict(x=x2), model1.y_predict)

        x3 = x2 + 10
        pred3 = model1.predict(x=x3)
        x3['intercept'] = 1.
        x3 = x3.reindex(columns=model1.beta.index)
        expected = Series(np.dot(x3.values, model1.beta.values), x3.index)
        assert_series_equal(expected, pred3)

        beta = Series(0., model1.beta.index)
        pred4 = model1.predict(beta=beta)
        assert_series_equal(Series(0., pred4.index), pred4)
Example #44
0
    def test_comparisons(self, data, reverse, base):
        cat_rev = Series(
            Categorical(data, categories=reverse, ordered=True))
        cat_rev_base = Series(
            Categorical(base, categories=reverse, ordered=True))
        cat = Series(Categorical(data, ordered=True))
        cat_base = Series(
            Categorical(base, categories=cat.cat.categories, ordered=True))
        s = Series(base)
        a = np.array(base)

        # comparisons need to take categories ordering into account
        res_rev = cat_rev > cat_rev_base
        exp_rev = Series([True, False, False])
        tm.assert_series_equal(res_rev, exp_rev)

        res_rev = cat_rev < cat_rev_base
        exp_rev = Series([False, False, True])
        tm.assert_series_equal(res_rev, exp_rev)

        res = cat > cat_base
        exp = Series([False, False, True])
        tm.assert_series_equal(res, exp)

        scalar = base[1]
        res = cat > scalar
        exp = Series([False, False, True])
        exp2 = cat.values > scalar
        tm.assert_series_equal(res, exp)
        tm.assert_numpy_array_equal(res.values, exp2)
        res_rev = cat_rev > scalar
        exp_rev = Series([True, False, False])
        exp_rev2 = cat_rev.values > scalar
        tm.assert_series_equal(res_rev, exp_rev)
        tm.assert_numpy_array_equal(res_rev.values, exp_rev2)

        # Only categories with same categories can be compared
        with pytest.raises(TypeError):
            cat > cat_rev

        # categorical cannot be compared to Series or numpy array, and also
        # not the other way around
        msg = ("Cannot compare a Categorical for op __gt__ with type"
               r" <class 'numpy\.ndarray'>")
        with pytest.raises(TypeError, match=msg):
            cat > s
        with pytest.raises(TypeError, match=msg):
            cat_rev > s
        with pytest.raises(TypeError, match=msg):
            cat > a
        with pytest.raises(TypeError, match=msg):
            cat_rev > a

        with pytest.raises(TypeError, match=msg):
            s < cat
        with pytest.raises(TypeError, match=msg):
            s < cat_rev

        with pytest.raises(TypeError, match=msg):
            a < cat
        with pytest.raises(TypeError, match=msg):
            a < cat_rev
    def test_set_index_cast_datetimeindex(self):
        df = DataFrame({'A': [datetime(2000, 1, 1) + timedelta(i)
                              for i in range(1000)],
                        'B': np.random.randn(1000)})

        idf = df.set_index('A')
        assert isinstance(idf.index, pd.DatetimeIndex)

        # don't cast a DatetimeIndex WITH a tz, leave as object
        # GH 6032
        i = (pd.DatetimeIndex(
            to_datetime(['2013-1-1 13:00',
                         '2013-1-2 14:00'], errors="raise"))
             .tz_localize('US/Pacific'))
        df = DataFrame(np.random.randn(2, 1), columns=['A'])

        expected = Series(np.array([pd.Timestamp('2013-01-01 13:00:00-0800',
                                                 tz='US/Pacific'),
                                    pd.Timestamp('2013-01-02 14:00:00-0800',
                                                 tz='US/Pacific')],
                                   dtype="object"))

        # convert index to series
        result = Series(i)
        assert_series_equal(result, expected)

        # assignt to frame
        df['B'] = i
        result = df['B']
        assert_series_equal(result, expected, check_names=False)
        assert result.name == 'B'

        # keep the timezone
        result = i.to_series(keep_tz=True)
        assert_series_equal(result.reset_index(drop=True), expected)

        # convert to utc
        df['C'] = i.to_series().reset_index(drop=True)
        result = df['C']
        comp = pd.DatetimeIndex(expected.values).copy()
        comp.tz = None
        tm.assert_numpy_array_equal(result.values, comp.values)

        # list of datetimes with a tz
        df['D'] = i.to_pydatetime()
        result = df['D']
        assert_series_equal(result, expected, check_names=False)
        assert result.name == 'D'

        # GH 6785
        # set the index manually
        import pytz
        df = DataFrame(
            [{'ts': datetime(2014, 4, 1, tzinfo=pytz.utc), 'foo': 1}])
        expected = df.set_index('ts')
        df.index = df['ts']
        df.pop('ts')
        assert_frame_equal(df, expected)

        # GH 3950
        # reset_index with single level
        for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern']:
            idx = pd.date_range('1/1/2011', periods=5,
                                freq='D', tz=tz, name='idx')
            df = pd.DataFrame(
                {'a': range(5), 'b': ['A', 'B', 'C', 'D', 'E']}, index=idx)

            expected = pd.DataFrame({'idx': [datetime(2011, 1, 1),
                                             datetime(2011, 1, 2),
                                             datetime(2011, 1, 3),
                                             datetime(2011, 1, 4),
                                             datetime(2011, 1, 5)],
                                     'a': range(5),
                                     'b': ['A', 'B', 'C', 'D', 'E']},
                                    columns=['idx', 'a', 'b'])
            expected['idx'] = expected['idx'].apply(
                lambda d: pd.Timestamp(d, tz=tz))
            assert_frame_equal(df.reset_index(), expected)
Example #46
0
def _compare_fullsample_ols(model1, model2):
    assert_series_equal(model1.beta, model2.beta)
Example #47
0
    def test_sort_values(self):

        # check indexes are reordered corresponding with the values
        ser = Series([3, 2, 4, 1], ['A', 'B', 'C', 'D'])
        expected = Series([1, 2, 3, 4], ['D', 'B', 'A', 'C'])
        result = ser.sort_values()
        tm.assert_series_equal(expected, result)

        ts = self.ts.copy()
        ts[:5] = np.NaN
        vals = ts.values

        result = ts.sort_values()
        assert np.isnan(result[-5:]).all()
        tm.assert_numpy_array_equal(result[:-5].values, np.sort(vals[5:]))

        # na_position
        result = ts.sort_values(na_position='first')
        assert np.isnan(result[:5]).all()
        tm.assert_numpy_array_equal(result[5:].values, np.sort(vals[5:]))

        # something object-type
        ser = Series(['A', 'B'], [1, 2])
        # no failure
        ser.sort_values()

        # ascending=False
        ordered = ts.sort_values(ascending=False)
        expected = np.sort(ts.valid().values)[::-1]
        assert_almost_equal(expected, ordered.valid().values)
        ordered = ts.sort_values(ascending=False, na_position='first')
        assert_almost_equal(expected, ordered.valid().values)

        # ascending=[False] should behave the same as ascending=False
        ordered = ts.sort_values(ascending=[False])
        expected = ts.sort_values(ascending=False)
        assert_series_equal(expected, ordered)
        ordered = ts.sort_values(ascending=[False], na_position='first')
        expected = ts.sort_values(ascending=False, na_position='first')
        assert_series_equal(expected, ordered)

        pytest.raises(ValueError, lambda: ts.sort_values(ascending=None))
        pytest.raises(ValueError, lambda: ts.sort_values(ascending=[]))
        pytest.raises(ValueError, lambda: ts.sort_values(ascending=[1, 2, 3]))
        pytest.raises(ValueError,
                      lambda: ts.sort_values(ascending=[False, False]))
        pytest.raises(ValueError, lambda: ts.sort_values(ascending='foobar'))

        # inplace=True
        ts = self.ts.copy()
        ts.sort_values(ascending=False, inplace=True)
        tm.assert_series_equal(ts, self.ts.sort_values(ascending=False))
        tm.assert_index_equal(ts.index,
                              self.ts.sort_values(ascending=False).index)

        # GH 5856/5853
        # Series.sort_values operating on a view
        df = DataFrame(np.random.randn(10, 4))
        s = df.iloc[:, 0]

        def f():
            s.sort_values(inplace=True)

        pytest.raises(ValueError, f)
    def test_reset_index(self):
        stacked = self.frame.stack()[::2]
        stacked = DataFrame({'foo': stacked, 'bar': stacked})

        names = ['first', 'second']
        stacked.index.names = names
        deleveled = stacked.reset_index()
        for i, (lev, lab) in enumerate(zip(stacked.index.levels,
                                           stacked.index.labels)):
            values = lev.take(lab)
            name = names[i]
            tm.assert_index_equal(values, Index(deleveled[name]))

        stacked.index.names = [None, None]
        deleveled2 = stacked.reset_index()
        tm.assert_series_equal(deleveled['first'], deleveled2['level_0'],
                               check_names=False)
        tm.assert_series_equal(deleveled['second'], deleveled2['level_1'],
                               check_names=False)

        # default name assigned
        rdf = self.frame.reset_index()
        exp = pd.Series(self.frame.index.values, name='index')
        tm.assert_series_equal(rdf['index'], exp)

        # default name assigned, corner case
        df = self.frame.copy()
        df['index'] = 'foo'
        rdf = df.reset_index()
        exp = pd.Series(self.frame.index.values, name='level_0')
        tm.assert_series_equal(rdf['level_0'], exp)

        # but this is ok
        self.frame.index.name = 'index'
        deleveled = self.frame.reset_index()
        tm.assert_series_equal(deleveled['index'],
                               pd.Series(self.frame.index))
        tm.assert_index_equal(deleveled.index,
                              pd.Index(np.arange(len(deleveled))))

        # preserve column names
        self.frame.columns.name = 'columns'
        resetted = self.frame.reset_index()
        assert resetted.columns.name == 'columns'

        # only remove certain columns
        frame = self.frame.reset_index().set_index(['index', 'A', 'B'])
        rs = frame.reset_index(['A', 'B'])

        # TODO should reset_index check_names ?
        assert_frame_equal(rs, self.frame, check_names=False)

        rs = frame.reset_index(['index', 'A', 'B'])
        assert_frame_equal(rs, self.frame.reset_index(), check_names=False)

        rs = frame.reset_index(['index', 'A', 'B'])
        assert_frame_equal(rs, self.frame.reset_index(), check_names=False)

        rs = frame.reset_index('A')
        xp = self.frame.reset_index().set_index(['index', 'B'])
        assert_frame_equal(rs, xp, check_names=False)

        # test resetting in place
        df = self.frame.copy()
        resetted = self.frame.reset_index()
        df.reset_index(inplace=True)
        assert_frame_equal(df, resetted, check_names=False)

        frame = self.frame.reset_index().set_index(['index', 'A', 'B'])
        rs = frame.reset_index('A', drop=True)
        xp = self.frame.copy()
        del xp['A']
        xp = xp.set_index(['B'], append=True)
        assert_frame_equal(rs, xp, check_names=False)
Example #49
0
def test_where_scalar(t, df, cond, expected_func):
    expr = ibis.where(cond, t['plain_int64'], 3.0)
    result = expr.execute()
    expected = expected_func(df)
    tm.assert_series_equal(result, expected)
Example #50
0
def test_fillna():
    result = pd.Series(mpd.MoneyArray([1, 0], 'USD')).fillna(method='ffill')
    expected = pd.Series(mpd.MoneyArray([1, 1], 'USD'))
    tm.assert_series_equal(result, expected)
Example #51
0
def test_notin(t, df, elements):
    expr = t.plain_float64.notin(elements)
    expected = ~df.plain_float64.isin(elements)
    result = expr.execute()
    tm.assert_series_equal(result, expected)
Example #52
0
def test_shift_on_column(n, column, sql):
    t = symbol('t', discover(sql))
    expr = t[column].shift(n)
    result = odo(compute(expr, sql), pd.Series)
    expected = odo(sql, pd.DataFrame)[column].shift(n)
    tm.assert_series_equal(result, expected)
Example #53
0
def test_notnull(t, df):
    expr = t.strings_with_nulls.notnull()
    result = expr.execute()
    expected = df.strings_with_nulls.notnull()
    tm.assert_series_equal(result, expected)
Example #54
0
def test_left_binary_op(t, df, op, args):
    expr = op(*args(t.float64_with_zeros))
    result = expr.execute()
    expected = op(*args(df.float64_with_zeros))
    tm.assert_series_equal(result, expected)
Example #55
0
def test_series_limit(t, df, offset):
    n = 5
    s_expr = t.plain_int64.limit(n, offset=offset)
    result = s_expr.execute()
    tm.assert_series_equal(result, df.plain_int64.iloc[offset:offset + n])
Example #56
0
def test_scalar_parameter(t, df, raw_value):
    value = ibis.param(dt.double)
    expr = t.float64_with_zeros == value
    result = expr.execute(params={value: raw_value})
    expected = df.float64_with_zeros == raw_value
    tm.assert_series_equal(result, expected)
Example #57
0
def test_table_column(t, df):
    expr = t.plain_int64
    result = expr.execute()
    expected = df.plain_int64
    tm.assert_series_equal(result, expected)
Example #58
0
def test_distinct(t, df):
    expr = t.dup_strings.distinct()
    result = expr.execute()
    expected = pd.Series(df.dup_strings.unique(), name='dup_strings')
    tm.assert_series_equal(result, expected)
Example #59
0
    def test_mixed_timezone_series_ops_object(self):
        # GH#13043
        ser = pd.Series(
            [
                pd.Timestamp("2015-01-01", tz="US/Eastern"),
                pd.Timestamp("2015-01-01", tz="Asia/Tokyo"),
            ],
            name="xxx",
        )
        assert ser.dtype == object

        exp = pd.Series(
            [
                pd.Timestamp("2015-01-02", tz="US/Eastern"),
                pd.Timestamp("2015-01-02", tz="Asia/Tokyo"),
            ],
            name="xxx",
        )
        tm.assert_series_equal(ser + pd.Timedelta("1 days"), exp)
        tm.assert_series_equal(pd.Timedelta("1 days") + ser, exp)

        # object series & object series
        ser2 = pd.Series(
            [
                pd.Timestamp("2015-01-03", tz="US/Eastern"),
                pd.Timestamp("2015-01-05", tz="Asia/Tokyo"),
            ],
            name="xxx",
        )
        assert ser2.dtype == object
        exp = pd.Series([pd.Timedelta("2 days"),
                         pd.Timedelta("4 days")],
                        name="xxx")
        tm.assert_series_equal(ser2 - ser, exp)
        tm.assert_series_equal(ser - ser2, -exp)

        ser = pd.Series(
            [pd.Timedelta("01:00:00"),
             pd.Timedelta("02:00:00")],
            name="xxx",
            dtype=object,
        )
        assert ser.dtype == object

        exp = pd.Series([pd.Timedelta("01:30:00"),
                         pd.Timedelta("02:30:00")],
                        name="xxx")
        tm.assert_series_equal(ser + pd.Timedelta("00:30:00"), exp)
        tm.assert_series_equal(pd.Timedelta("00:30:00") + ser, exp)
Example #60
0
def test_null_if_zero(t, df, column):
    expr = t[column].nullifzero()
    result = expr.execute()
    expected = df[column].replace(0, np.nan)
    tm.assert_series_equal(result, expected)