Ejemplo n.º 1
0
        def testit():
            for f, f2 in [(self.frame, self.frame2),
                          (self.mixed, self.mixed2)]:

                f11 = f
                f12 = f + 1

                f21 = f2
                f22 = f2 + 1

                for op, op_str in [('gt', '>'), ('lt', '<'), ('ge', '>='),
                                   ('le', '<='), ('eq', '=='), ('ne', '!=')]:

                    op = getattr(operator, op)

                    result = expr._can_use_numexpr(op, op_str, f11, f12,
                                                   'evaluate')
                    self.assertNotEqual(result, f11._is_mixed_type)

                    result = expr.evaluate(op, op_str, f11, f12,
                                           use_numexpr=True)
                    expected = expr.evaluate(op, op_str, f11, f12,
                                             use_numexpr=False)
                    tm.assert_numpy_array_equal(result, expected.values)

                    result = expr._can_use_numexpr(op, op_str, f21, f22,
                                                   'evaluate')
                    self.assertFalse(result)
Ejemplo n.º 2
0
    def test_constructor(self):

        # explicit construction
        index = Float64Index([1, 2, 3, 4, 5])
        assert isinstance(index, Float64Index)
        expected = np.array([1, 2, 3, 4, 5], dtype='float64')
        tm.assert_numpy_array_equal(index.values, expected)
        index = Float64Index(np.array([1, 2, 3, 4, 5]))
        assert isinstance(index, Float64Index)
        index = Float64Index([1., 2, 3, 4, 5])
        assert isinstance(index, Float64Index)
        index = Float64Index(np.array([1., 2, 3, 4, 5]))
        assert isinstance(index, Float64Index)
        assert index.dtype == float

        index = Float64Index(np.array([1., 2, 3, 4, 5]), dtype=np.float32)
        assert isinstance(index, Float64Index)
        assert index.dtype == np.float64

        index = Float64Index(np.array([1, 2, 3, 4, 5]), dtype=np.float32)
        assert isinstance(index, Float64Index)
        assert index.dtype == np.float64

        # nan handling
        result = Float64Index([np.nan, np.nan])
        assert pd.isna(result.values).all()
        result = Float64Index(np.array([np.nan]))
        assert pd.isna(result.values).all()
        result = Index(np.array([np.nan]))
        assert pd.isna(result.values).all()
Ejemplo n.º 3
0
def test_conversions(data_missing):

    # astype to object series
    df = pd.DataFrame({'A': data_missing})
    result = df['A'].astype('object')
    expected = pd.Series(np.array([np.nan, 1], dtype=object), name='A')
    tm.assert_series_equal(result, expected)

    # convert to object ndarray
    # we assert that we are exactly equal
    # including type conversions of scalars
    result = df['A'].astype('object').values
    expected = np.array([np.nan, 1], dtype=object)
    tm.assert_numpy_array_equal(result, expected)

    for r, e in zip(result, expected):
        if pd.isnull(r):
            assert pd.isnull(e)
        elif is_integer(r):
            # PY2 can be int or long
            assert r == e
            assert is_integer(e)
        else:
            assert r == e
            assert type(r) == type(e)
Ejemplo n.º 4
0
    def test_equals(self):
        # GH 13107
        for tz in [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']:
            idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT'])
            assert idx.equals(idx)
            assert idx.equals(idx.copy())
            assert idx.equals(idx.asobject)
            assert idx.asobject.equals(idx)
            assert idx.asobject.equals(idx.asobject)
            assert not idx.equals(list(idx))
            assert not idx.equals(pd.Series(idx))

            idx2 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT'],
                                    tz='US/Pacific')
            assert not idx.equals(idx2)
            assert not idx.equals(idx2.copy())
            assert not idx.equals(idx2.asobject)
            assert not idx.asobject.equals(idx2)
            assert not idx.equals(list(idx2))
            assert not idx.equals(pd.Series(idx2))

            # same internal, different tz
            idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz='US/Pacific')
            tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
            assert not idx.equals(idx3)
            assert not idx.equals(idx3.copy())
            assert not idx.equals(idx3.asobject)
            assert not idx.asobject.equals(idx3)
            assert not idx.equals(list(idx3))
            assert not idx.equals(pd.Series(idx3))
Ejemplo n.º 5
0
    def test_properties(self, closed):
        index = self.create_index(closed=closed)
        assert len(index) == 10
        assert index.size == 10
        assert index.shape == (10, )

        tm.assert_index_equal(index.left, Index(np.arange(10)))
        tm.assert_index_equal(index.right, Index(np.arange(1, 11)))
        tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5)))

        assert index.closed == closed

        ivs = [Interval(l, r, closed) for l, r in zip(range(10), range(1, 11))]
        expected = np.array(ivs, dtype=object)
        tm.assert_numpy_array_equal(np.asarray(index), expected)

        # with nans
        index = self.create_index_with_nan(closed=closed)
        assert len(index) == 10
        assert index.size == 10
        assert index.shape == (10, )

        expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9])
        expected_right = expected_left + 1
        expected_mid = expected_left + 0.5
        tm.assert_index_equal(index.left, expected_left)
        tm.assert_index_equal(index.right, expected_right)
        tm.assert_index_equal(index.mid, expected_mid)

        assert index.closed == closed

        ivs = [Interval(l, r, closed) if notna(l) else np.nan
               for l, r in zip(expected_left, expected_right)]
        expected = np.array(ivs, dtype=object)
        tm.assert_numpy_array_equal(np.asarray(index), expected)
Ejemplo n.º 6
0
    def test_constructor_with_datetimelike(self):

        # 12077
        # constructor wwth a datetimelike and NaT

        for dtl in [date_range('1995-01-01 00:00:00', periods=5, freq='s'),
                    date_range('1995-01-01 00:00:00', periods=5,
                               freq='s', tz='US/Eastern'),
                    timedelta_range('1 day', periods=5, freq='s')]:

            s = Series(dtl)
            c = Categorical(s)
            expected = type(dtl)(s)
            expected.freq = None
            tm.assert_index_equal(c.categories, expected)
            tm.assert_numpy_array_equal(c.codes, np.arange(5, dtype='int8'))

            # with NaT
            s2 = s.copy()
            s2.iloc[-1] = NaT
            c = Categorical(s2)
            expected = type(dtl)(s2.dropna())
            expected.freq = None
            tm.assert_index_equal(c.categories, expected)

            exp = np.array([0, 1, 2, 3, -1], dtype=np.int8)
            tm.assert_numpy_array_equal(c.codes, exp)

            result = repr(c)
            assert 'NaT' in result
Ejemplo n.º 7
0
 def test_simple(self):
     x, y = list('ABC'), [1, 22]
     result1, result2 = cartesian_product([x, y])
     expected1 = np.array(['A', 'A', 'B', 'B', 'C', 'C'])
     expected2 = np.array([1, 22, 1, 22, 1, 22])
     tm.assert_numpy_array_equal(result1, expected1)
     tm.assert_numpy_array_equal(result2, expected2)
Ejemplo n.º 8
0
    def test_value_counts_inferred(self):
        klasses = [Index, Series]
        for klass in klasses:
            s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a']
            s = klass(s_values)
            expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c'])
            tm.assert_series_equal(s.value_counts(), expected)

            if isinstance(s, Index):
                exp = Index(np.unique(np.array(s_values, dtype=np.object_)))
                tm.assert_index_equal(s.unique(), exp)
            else:
                exp = np.unique(np.array(s_values, dtype=np.object_))
                tm.assert_numpy_array_equal(s.unique(), exp)

            assert s.nunique() == 4
            # don't sort, have to sort after the fact as not sorting is
            # platform-dep
            hist = s.value_counts(sort=False).sort_values()
            expected = Series([3, 1, 4, 2], index=list('acbd')).sort_values()
            tm.assert_series_equal(hist, expected)

            # sort ascending
            hist = s.value_counts(ascending=True)
            expected = Series([1, 2, 3, 4], index=list('cdab'))
            tm.assert_series_equal(hist, expected)

            # relative histogram.
            hist = s.value_counts(normalize=True)
            expected = Series([.4, .3, .2, .1], index=['b', 'a', 'd', 'c'])
            tm.assert_series_equal(hist, expected)
Ejemplo n.º 9
0
    def test_interleave(self):

        # interleave with object
        result = self.tzframe.assign(D='foo').values
        expected = np.array([[Timestamp('2013-01-01 00:00:00'),
                              Timestamp('2013-01-02 00:00:00'),
                              Timestamp('2013-01-03 00:00:00')],
                             [Timestamp('2013-01-01 00:00:00-0500',
                                        tz='US/Eastern'),
                              pd.NaT,
                              Timestamp('2013-01-03 00:00:00-0500',
                                        tz='US/Eastern')],
                             [Timestamp('2013-01-01 00:00:00+0100', tz='CET'),
                              pd.NaT,
                              Timestamp('2013-01-03 00:00:00+0100', tz='CET')],
                             ['foo', 'foo', 'foo']], dtype=object).T
        tm.assert_numpy_array_equal(result, expected)

        # interleave with only datetime64[ns]
        result = self.tzframe.values
        expected = np.array([[Timestamp('2013-01-01 00:00:00'),
                              Timestamp('2013-01-02 00:00:00'),
                              Timestamp('2013-01-03 00:00:00')],
                             [Timestamp('2013-01-01 00:00:00-0500',
                                        tz='US/Eastern'),
                              pd.NaT,
                              Timestamp('2013-01-03 00:00:00-0500',
                                        tz='US/Eastern')],
                             [Timestamp('2013-01-01 00:00:00+0100', tz='CET'),
                              pd.NaT,
                              Timestamp('2013-01-03 00:00:00+0100',
                                        tz='CET')]], dtype=object).T
        tm.assert_numpy_array_equal(result, expected)
Ejemplo n.º 10
0
    def _check(dtype):
        obj = np.array(np.random.randn(20), dtype=dtype)

        bins = np.array([6, 12, 20])
        out = np.zeros((3, 4), dtype)
        counts = np.zeros(len(out), dtype=np.int64)
        labels = _ensure_int64(np.repeat(np.arange(3),
                                         np.diff(np.r_[0, bins])))

        func = getattr(groupby, 'group_ohlc_%s' % dtype)
        func(out, counts, obj[:, None], labels)

        def _ohlc(group):
            if isna(group).all():
                return np.repeat(nan, 4)
            return [group[0], group.max(), group.min(), group[-1]]

        expected = np.array([_ohlc(obj[:6]), _ohlc(obj[6:12]),
                             _ohlc(obj[12:])])

        assert_almost_equal(out, expected)
        tm.assert_numpy_array_equal(counts,
                                    np.array([6, 6, 8], dtype=np.int64))

        obj[:6] = nan
        func(out, counts, obj[:, None], labels)
        expected[0] = nan
        assert_almost_equal(out, expected)
Ejemplo n.º 11
0
    def test_include_na(self):
        s = ['a', 'b', np.nan]
        res = get_dummies(s, sparse=self.sparse)
        exp = DataFrame({'a': {0: 1.0,
                               1: 0.0,
                               2: 0.0},
                         'b': {0: 0.0,
                               1: 1.0,
                               2: 0.0}})
        assert_frame_equal(res, exp)

        # Sparse dataframes do not allow nan labelled columns, see #GH8822
        res_na = get_dummies(s, dummy_na=True, sparse=self.sparse)
        exp_na = DataFrame({nan: {0: 0.0,
                                  1: 0.0,
                                  2: 1.0},
                            'a': {0: 1.0,
                                  1: 0.0,
                                  2: 0.0},
                            'b': {0: 0.0,
                                  1: 1.0,
                                  2: 0.0}}).reindex_axis(
                                      ['a', 'b', nan], 1)
        # hack (NaN handling in assert_index_equal)
        exp_na.columns = res_na.columns
        assert_frame_equal(res_na, exp_na)

        res_just_na = get_dummies([nan], dummy_na=True, sparse=self.sparse)
        exp_just_na = DataFrame(Series(1.0, index=[0]), columns=[nan])
        tm.assert_numpy_array_equal(res_just_na.values, exp_just_na.values)
Ejemplo n.º 12
0
    def test_frame_non_unique_columns(self):
        df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 2],
                       columns=['x', 'x'])

        self.assertRaises(ValueError, df.to_json, orient='index')
        self.assertRaises(ValueError, df.to_json, orient='columns')
        self.assertRaises(ValueError, df.to_json, orient='records')

        assert_frame_equal(df, read_json(df.to_json(orient='split'),
                                         orient='split', dtype=False))
        unser = read_json(df.to_json(orient='values'), orient='values')
        tm.assert_numpy_array_equal(df.values, unser.values)

        # GH4377; duplicate columns not processing correctly
        df = DataFrame([['a', 'b'], ['c', 'd']], index=[
                       1, 2], columns=['x', 'y'])
        result = read_json(df.to_json(orient='split'), orient='split')
        assert_frame_equal(result, df)

        def _check(df):
            result = read_json(df.to_json(orient='split'), orient='split',
                               convert_dates=['x'])
            assert_frame_equal(result, df)

        for o in [[['a', 'b'], ['c', 'd']],
                  [[1.5, 2.5], [3.5, 4.5]],
                  [[1, 2.5], [3, 4.5]],
                  [[Timestamp('20130101'), 3.5],
                   [Timestamp('20130102'), 4.5]]]:
            _check(DataFrame(o, index=[1, 2], columns=['x', 'x']))
Ejemplo n.º 13
0
 def test_as_matrix_deprecated(self, float_frame):
     # GH 18458
     with tm.assert_produces_warning(FutureWarning):
         cols = float_frame.columns.tolist()
         result = float_frame.as_matrix(columns=cols)
     expected = float_frame.values
     tm.assert_numpy_array_equal(result, expected)
Ejemplo n.º 14
0
 def test_compare_timedelta_ndarray(self):
     # GH11835
     periods = [Timedelta('0 days 01:00:00'), Timedelta('0 days 01:00:00')]
     arr = np.array(periods)
     result = arr[0] > arr
     expected = np.array([False, False])
     tm.assert_numpy_array_equal(result, expected)
Ejemplo n.º 15
0
    def test_cast_scalar_to_array(self):
        arr = cast_scalar_to_array((3, 2), 1, dtype=np.int64)
        exp = np.ones((3, 2), dtype=np.int64)
        tm.assert_numpy_array_equal(arr, exp)

        arr = cast_scalar_to_array((3, 2), 1.1)
        exp = np.empty((3, 2), dtype=np.float64)
        exp.fill(1.1)
        tm.assert_numpy_array_equal(arr, exp)

        arr = cast_scalar_to_array((2, 3), Timestamp('2011-01-01'))
        exp = np.empty((2, 3), dtype='datetime64[ns]')
        exp.fill(np.datetime64('2011-01-01'))
        tm.assert_numpy_array_equal(arr, exp)

        # pandas dtype is stored as object dtype
        obj = Timestamp('2011-01-01', tz='US/Eastern')
        arr = cast_scalar_to_array((2, 3), obj)
        exp = np.empty((2, 3), dtype=np.object)
        exp.fill(obj)
        tm.assert_numpy_array_equal(arr, exp)

        obj = Period('2011-01-01', freq='D')
        arr = cast_scalar_to_array((2, 3), obj)
        exp = np.empty((2, 3), dtype=np.object)
        exp.fill(obj)
        tm.assert_numpy_array_equal(arr, exp)
Ejemplo n.º 16
0
 def test_noright(self):
     data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575])
     result, bins = cut(data, 4, right=False, retbins=True)
     exp_codes = np.array([0, 0, 0, 2, 3, 0, 1], dtype=np.int8)
     tm.assert_numpy_array_equal(result.codes, exp_codes)
     exp = np.array([0.2, 2.575, 4.95, 7.325, 9.7095])
     tm.assert_almost_equal(bins, exp)
Ejemplo n.º 17
0
 def test_arraylike(self):
     data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1]
     result, bins = cut(data, 3, retbins=True)
     exp_codes = np.array([0, 0, 0, 1, 2, 0], dtype=np.int8)
     tm.assert_numpy_array_equal(result.codes, exp_codes)
     exp = np.array([0.1905, 3.36666667, 6.53333333, 9.7])
     tm.assert_almost_equal(bins, exp)
Ejemplo n.º 18
0
    def test_difference_base(self):
        for name, idx in compat.iteritems(self.indices):
            first = idx[2:]
            second = idx[:4]
            answer = idx[4:]
            result = first.difference(second)

            if isinstance(idx, CategoricalIndex):
                pass
            else:
                self.assertTrue(tm.equalContents(result, answer))

            # GH 10149
            cases = [klass(second.values)
                     for klass in [np.array, Series, list]]
            for case in cases:
                if isinstance(idx, PeriodIndex):
                    msg = "can only call with other PeriodIndex-ed objects"
                    with tm.assertRaisesRegexp(ValueError, msg):
                        result = first.difference(case)
                elif isinstance(idx, CategoricalIndex):
                    pass
                elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)):
                    self.assertEqual(result.__class__, answer.__class__)
                    tm.assert_numpy_array_equal(result.asi8, answer.asi8)
                else:
                    result = first.difference(case)
                    self.assertTrue(tm.equalContents(result, answer))

            if isinstance(idx, MultiIndex):
                msg = "other must be a MultiIndex or a list of tuples"
                with tm.assertRaisesRegexp(TypeError, msg):
                    result = first.difference([1, 2, 3])
Ejemplo n.º 19
0
def test_from_product_datetimeindex():
    dt_index = date_range('2000-01-01', periods=2)
    mi = pd.MultiIndex.from_product([[1, 2], dt_index])
    etalon = construct_1d_object_array_from_listlike([(1, pd.Timestamp(
        '2000-01-01')), (1, pd.Timestamp('2000-01-02')), (2, pd.Timestamp(
            '2000-01-01')), (2, pd.Timestamp('2000-01-02'))])
    tm.assert_numpy_array_equal(mi.values, etalon)
Ejemplo n.º 20
0
    def test_map(self):
        ci = pd.CategoricalIndex(list('ABABC'), categories=list('CBA'),
                                 ordered=True)
        result = ci.map(lambda x: x.lower())
        exp = pd.Categorical(list('ababc'), categories=list('cba'),
                             ordered=True)
        tm.assert_categorical_equal(result, exp)

        ci = pd.CategoricalIndex(list('ABABC'), categories=list('BAC'),
                                 ordered=False, name='XXX')
        result = ci.map(lambda x: x.lower())
        exp = pd.Categorical(list('ababc'), categories=list('bac'),
                             ordered=False)
        tm.assert_categorical_equal(result, exp)

        tm.assert_numpy_array_equal(ci.map(lambda x: 1),
                                    np.array([1] * 5, dtype=np.int64))

        # change categories dtype
        ci = pd.CategoricalIndex(list('ABABC'), categories=list('BAC'),
                                 ordered=False)
        def f(x):
            return {'A': 10, 'B': 20, 'C': 30}.get(x)

        result = ci.map(f)
        exp = pd.Categorical([10, 20, 10, 20, 30], categories=[20, 10, 30],
                             ordered=False)
        tm.assert_categorical_equal(result, exp)
Ejemplo n.º 21
0
    def test_numpy_argsort(self):
        for k, ind in self.indices.items():
            result = np.argsort(ind)
            expected = ind.argsort()
            tm.assert_numpy_array_equal(result, expected)

            # these are the only two types that perform
            # pandas compatibility input validation - the
            # rest already perform separate (or no) such
            # validation via their 'values' attribute as
            # defined in pandas.indexes/base.py - they
            # cannot be changed at the moment due to
            # backwards compatibility concerns
            if isinstance(type(ind), (CategoricalIndex, RangeIndex)):
                msg = "the 'axis' parameter is not supported"
                tm.assertRaisesRegexp(ValueError, msg,
                                      np.argsort, ind, axis=1)

                msg = "the 'kind' parameter is not supported"
                tm.assertRaisesRegexp(ValueError, msg, np.argsort,
                                      ind, kind='mergesort')

                msg = "the 'order' parameter is not supported"
                tm.assertRaisesRegexp(ValueError, msg, np.argsort,
                                      ind, order=('a', 'b'))
Ejemplo n.º 22
0
    def _check_axes_shape(self, axes, axes_num=None, layout=None,
                          figsize=None):
        """
        Check expected number of axes is drawn in expected layout

        Parameters
        ----------
        axes : matplotlib Axes object, or its list-like
        axes_num : number
            expected number of axes. Unnecessary axes should be set to
            invisible.
        layout :  tuple
            expected layout, (expected number of rows , columns)
        figsize : tuple
            expected figsize. default is matplotlib default
        """
        from pandas.plotting._matplotlib.tools import _flatten

        if figsize is None:
            figsize = self.default_figsize
        visible_axes = self._flatten_visible(axes)

        if axes_num is not None:
            assert len(visible_axes) == axes_num
            for ax in visible_axes:
                # check something drawn on visible axes
                assert len(ax.get_children()) > 0

        if layout is not None:
            result = self._get_axes_layout(_flatten(axes))
            assert result == layout

        tm.assert_numpy_array_equal(
            visible_axes[0].figure.get_size_inches(),
            np.array(figsize, dtype=np.float64))
Ejemplo n.º 23
0
    def test_equals(self, freq):
        # GH#13107
        idx = pd.PeriodIndex(['2011-01-01', '2011-01-02', 'NaT'],
                             freq=freq)
        assert idx.equals(idx)
        assert idx.equals(idx.copy())
        assert idx.equals(idx.astype(object))
        assert idx.astype(object).equals(idx)
        assert idx.astype(object).equals(idx.astype(object))
        assert not idx.equals(list(idx))
        assert not idx.equals(pd.Series(idx))

        idx2 = pd.PeriodIndex(['2011-01-01', '2011-01-02', 'NaT'],
                              freq='H')
        assert not idx.equals(idx2)
        assert not idx.equals(idx2.copy())
        assert not idx.equals(idx2.astype(object))
        assert not idx.astype(object).equals(idx2)
        assert not idx.equals(list(idx2))
        assert not idx.equals(pd.Series(idx2))

        # same internal, different tz
        idx3 = pd.PeriodIndex._simple_new(idx.asi8, freq='H')
        tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
        assert not idx.equals(idx3)
        assert not idx.equals(idx3.copy())
        assert not idx.equals(idx3.astype(object))
        assert not idx.astype(object).equals(idx3)
        assert not idx.equals(list(idx3))
        assert not idx.equals(pd.Series(idx3))
Ejemplo n.º 24
0
    def test_get_value(self):
        # GH 17717
        p0 = pd.Period('2017-09-01')
        p1 = pd.Period('2017-09-02')
        p2 = pd.Period('2017-09-03')

        idx0 = pd.PeriodIndex([p0, p1, p2])
        input0 = np.array([1, 2, 3])
        expected0 = 2

        result0 = idx0.get_value(input0, p1)
        assert result0 == expected0

        idx1 = pd.PeriodIndex([p1, p1, p2])
        input1 = np.array([1, 2, 3])
        expected1 = np.array([1, 2])

        result1 = idx1.get_value(input1, p1)
        tm.assert_numpy_array_equal(result1, expected1)

        idx2 = pd.PeriodIndex([p1, p2, p1])
        input2 = np.array([1, 2, 3])
        expected2 = np.array([1, 3])

        result2 = idx2.get_value(input2, p1)
        tm.assert_numpy_array_equal(result2, expected2)
Ejemplo n.º 25
0
    def test_get_indexer(self):
        # GH 17717
        p1 = pd.Period('2017-09-01')
        p2 = pd.Period('2017-09-04')
        p3 = pd.Period('2017-09-07')

        tp0 = pd.Period('2017-08-31')
        tp1 = pd.Period('2017-09-02')
        tp2 = pd.Period('2017-09-05')
        tp3 = pd.Period('2017-09-09')

        idx = pd.PeriodIndex([p1, p2, p3])

        tm.assert_numpy_array_equal(idx.get_indexer(idx),
                                    np.array([0, 1, 2], dtype=np.intp))

        target = pd.PeriodIndex([tp0, tp1, tp2, tp3])
        tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'),
                                    np.array([-1, 0, 1, 2], dtype=np.intp))
        tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'),
                                    np.array([0, 1, 2, -1], dtype=np.intp))
        tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'),
                                    np.array([0, 0, 1, 2], dtype=np.intp))

        res = idx.get_indexer(target, 'nearest',
                              tolerance=pd.Timedelta('1 day'))
        tm.assert_numpy_array_equal(res,
                                    np.array([0, 0, 1, -1], dtype=np.intp))
Ejemplo n.º 26
0
    def test_groupby_categorical_no_compress(self):
        data = Series(np.random.randn(9))

        codes = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])
        cats = Categorical.from_codes(codes, [0, 1, 2], ordered=True)

        result = data.groupby(cats).mean()
        exp = data.groupby(codes).mean()

        exp.index = CategoricalIndex(exp.index, categories=cats.categories,
                                     ordered=cats.ordered)
        assert_series_equal(result, exp)

        codes = np.array([0, 0, 0, 1, 1, 1, 3, 3, 3])
        cats = Categorical.from_codes(codes, [0, 1, 2, 3], ordered=True)

        result = data.groupby(cats).mean()
        exp = data.groupby(codes).mean().reindex(cats.categories)
        exp.index = CategoricalIndex(exp.index, categories=cats.categories,
                                     ordered=cats.ordered)
        assert_series_equal(result, exp)

        cats = Categorical(["a", "a", "a", "b", "b", "b", "c", "c", "c"],
                           categories=["a", "b", "c", "d"], ordered=True)
        data = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 4, 5], "b": cats})

        result = data.groupby("b").mean()
        result = result["a"].values
        exp = np.array([1, 2, 4, np.nan])
        tm.assert_numpy_array_equal(result, exp)
Ejemplo n.º 27
0
 def test_convert_sql_column_unicode(self):
     arr = np.array([u('1.5'), None, u('3'), u('4.2')],
                    dtype=object)
     result = lib.convert_sql_column(arr)
     expected = np.array([u('1.5'), np.nan, u('3'), u('4.2')],
                         dtype=object)
     tm.assert_numpy_array_equal(result, expected)
Ejemplo n.º 28
0
 def test_rename_set_name(self):
     s = Series(range(4), index=list('abcd'))
     for name in ['foo', 123, 123., datetime(2001, 11, 11), ('foo',)]:
         result = s.rename(name)
         assert result.name == name
         tm.assert_numpy_array_equal(result.index.values, s.index.values)
         assert s.name is None
Ejemplo n.º 29
0
    def test_cython_right_outer_join(self):
        left = a_([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64)
        right = a_([1, 1, 0, 4, 2, 2, 1], dtype=np.int64)
        max_group = 5

        rs, ls = libjoin.left_outer_join(right, left, max_group)

        exp_ls = left.argsort(kind='mergesort')
        exp_rs = right.argsort(kind='mergesort')

        #            0        1        1        1
        exp_li = a_([0, 1, 2, 3, 4, 5, 3, 4, 5, 3, 4, 5,
                     #            2        2        4
                     6, 7, 8, 6, 7, 8, -1])
        exp_ri = a_([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3,
                     4, 4, 4, 5, 5, 5, 6])

        exp_ls = exp_ls.take(exp_li)
        exp_ls[exp_li == -1] = -1

        exp_rs = exp_rs.take(exp_ri)
        exp_rs[exp_ri == -1] = -1

        tm.assert_numpy_array_equal(ls, exp_ls, check_dtype=False)
        tm.assert_numpy_array_equal(rs, exp_rs, check_dtype=False)
Ejemplo n.º 30
0
 def test_reindex_empty_index(self):
     # See GH16770
     c = CategoricalIndex([])
     res, indexer = c.reindex(['a', 'b'])
     tm.assert_index_equal(res, Index(['a', 'b']), exact=True)
     tm.assert_numpy_array_equal(indexer,
                                 np.array([-1, -1], dtype=np.intp))
Ejemplo n.º 31
0
 def assert_as_array_equals(slc, asarray):
     tm.assert_numpy_array_equal(
         BlockPlacement(slc).as_array, np.asarray(asarray, dtype=np.int64)
     )
Ejemplo n.º 32
0
 def test_convert_non_hashable(self):
     # GH13324
     # make sure that we are handing non-hashables
     arr = np.array([[10.0, 2], 1.0, "apple"])
     result = lib.maybe_convert_numeric(arr, set(), False, True)
     tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan]))
Ejemplo n.º 33
0
 def test_reindex_empty_index(self):
     # See GH16770
     c = CategoricalIndex([])
     res, indexer = c.reindex(['a', 'b'])
     tm.assert_index_equal(res, Index(['a', 'b']), exact=True)
     tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp))
Ejemplo n.º 34
0
    def test_construction(self):

        ci = self.create_index(categories=list('abcd'))
        categories = ci.categories

        result = Index(ci)
        tm.assert_index_equal(result, ci, exact=True)
        assert not result.ordered

        result = Index(ci.values)
        tm.assert_index_equal(result, ci, exact=True)
        assert not result.ordered

        # empty
        result = CategoricalIndex(categories=categories)
        tm.assert_index_equal(result.categories, Index(categories))
        tm.assert_numpy_array_equal(result.codes, np.array([], dtype='int8'))
        assert not result.ordered

        # passing categories
        result = CategoricalIndex(list('aabbca'), categories=categories)
        tm.assert_index_equal(result.categories, Index(categories))
        tm.assert_numpy_array_equal(result.codes,
                                    np.array([0, 0, 1, 1, 2, 0], dtype='int8'))

        c = pd.Categorical(list('aabbca'))
        result = CategoricalIndex(c)
        tm.assert_index_equal(result.categories, Index(list('abc')))
        tm.assert_numpy_array_equal(result.codes,
                                    np.array([0, 0, 1, 1, 2, 0], dtype='int8'))
        assert not result.ordered

        result = CategoricalIndex(c, categories=categories)
        tm.assert_index_equal(result.categories, Index(categories))
        tm.assert_numpy_array_equal(result.codes,
                                    np.array([0, 0, 1, 1, 2, 0], dtype='int8'))
        assert not result.ordered

        ci = CategoricalIndex(c, categories=list('abcd'))
        result = CategoricalIndex(ci)
        tm.assert_index_equal(result.categories, Index(categories))
        tm.assert_numpy_array_equal(result.codes,
                                    np.array([0, 0, 1, 1, 2, 0], dtype='int8'))
        assert not result.ordered

        result = CategoricalIndex(ci, categories=list('ab'))
        tm.assert_index_equal(result.categories, Index(list('ab')))
        tm.assert_numpy_array_equal(
            result.codes, np.array([0, 0, 1, 1, -1, 0], dtype='int8'))
        assert not result.ordered

        result = CategoricalIndex(ci, categories=list('ab'), ordered=True)
        tm.assert_index_equal(result.categories, Index(list('ab')))
        tm.assert_numpy_array_equal(
            result.codes, np.array([0, 0, 1, 1, -1, 0], dtype='int8'))
        assert result.ordered

        result = pd.CategoricalIndex(ci, categories=list('ab'), ordered=True)
        expected = pd.CategoricalIndex(ci,
                                       categories=list('ab'),
                                       ordered=True,
                                       dtype='category')
        tm.assert_index_equal(result, expected, exact=True)

        # turn me to an Index
        result = Index(np.array(ci))
        assert isinstance(result, Index)
        assert not isinstance(result, CategoricalIndex)
Ejemplo n.º 35
0
    def test_order(self):
        # with freq
        idx1 = DatetimeIndex(['2011-01-01', '2011-01-02',
                              '2011-01-03'], freq='D', name='idx')
        idx2 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00',
                              '2011-01-01 11:00'], freq='H',
                             tz='Asia/Tokyo', name='tzidx')

        for idx in [idx1, idx2]:
            ordered = idx.sort_values()
            tm.assert_index_equal(ordered, idx)
            assert ordered.freq == idx.freq

            ordered = idx.sort_values(ascending=False)
            expected = idx[::-1]
            tm.assert_index_equal(ordered, expected)
            assert ordered.freq == expected.freq
            assert ordered.freq.n == -1

            ordered, indexer = idx.sort_values(return_indexer=True)
            tm.assert_index_equal(ordered, idx)
            tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]),
                                        check_dtype=False)
            assert ordered.freq == idx.freq

            ordered, indexer = idx.sort_values(return_indexer=True,
                                               ascending=False)
            expected = idx[::-1]
            tm.assert_index_equal(ordered, expected)
            tm.assert_numpy_array_equal(indexer,
                                        np.array([2, 1, 0]),
                                        check_dtype=False)
            assert ordered.freq == expected.freq
            assert ordered.freq.n == -1

        # without freq
        for tz in self.tz:
            idx1 = DatetimeIndex(['2011-01-01', '2011-01-03', '2011-01-05',
                                  '2011-01-02', '2011-01-01'],
                                 tz=tz, name='idx1')
            exp1 = DatetimeIndex(['2011-01-01', '2011-01-01', '2011-01-02',
                                  '2011-01-03', '2011-01-05'],
                                 tz=tz, name='idx1')

            idx2 = DatetimeIndex(['2011-01-01', '2011-01-03', '2011-01-05',
                                  '2011-01-02', '2011-01-01'],
                                 tz=tz, name='idx2')

            exp2 = DatetimeIndex(['2011-01-01', '2011-01-01', '2011-01-02',
                                  '2011-01-03', '2011-01-05'],
                                 tz=tz, name='idx2')

            idx3 = DatetimeIndex([pd.NaT, '2011-01-03', '2011-01-05',
                                  '2011-01-02', pd.NaT], tz=tz, name='idx3')
            exp3 = DatetimeIndex([pd.NaT, pd.NaT, '2011-01-02', '2011-01-03',
                                  '2011-01-05'], tz=tz, name='idx3')

            for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3)]:
                ordered = idx.sort_values()
                tm.assert_index_equal(ordered, expected)
                assert ordered.freq is None

                ordered = idx.sort_values(ascending=False)
                tm.assert_index_equal(ordered, expected[::-1])
                assert ordered.freq is None

                ordered, indexer = idx.sort_values(return_indexer=True)
                tm.assert_index_equal(ordered, expected)

                exp = np.array([0, 4, 3, 1, 2])
                tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
                assert ordered.freq is None

                ordered, indexer = idx.sort_values(return_indexer=True,
                                                   ascending=False)
                tm.assert_index_equal(ordered, expected[::-1])

                exp = np.array([2, 1, 3, 4, 0])
                tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
                assert ordered.freq is None
Ejemplo n.º 36
0
def assert_block_equal(left, right):
    tm.assert_numpy_array_equal(left.values, right.values)
    assert left.dtype == right.dtype
    assert isinstance(left.mgr_locs, BlockPlacement)
    assert isinstance(right.mgr_locs, BlockPlacement)
    tm.assert_numpy_array_equal(left.mgr_locs.as_array, right.mgr_locs.as_array)
Ejemplo n.º 37
0
 def test_mgr_locs(self):
     assert isinstance(self.fblock.mgr_locs, BlockPlacement)
     tm.assert_numpy_array_equal(
         self.fblock.mgr_locs.as_array, np.array([0, 2, 4], dtype=np.int64)
     )
Ejemplo n.º 38
0
    def test_eq(self, other):
        idx = PeriodIndex(["2017", "2017", "2018"], freq="D")
        expected = np.array([True, True, False])
        result = idx == other

        tm.assert_numpy_array_equal(result, expected)
Ejemplo n.º 39
0
    def test_pi_cmp_nat(self, freq):
        idx1 = PeriodIndex(["2011-01", "2011-02", "NaT", "2011-05"], freq=freq)

        result = idx1 > Period("2011-02", freq=freq)
        exp = np.array([False, False, False, True])
        tm.assert_numpy_array_equal(result, exp)
        result = Period("2011-02", freq=freq) < idx1
        tm.assert_numpy_array_equal(result, exp)

        result = idx1 == Period("NaT", freq=freq)
        exp = np.array([False, False, False, False])
        tm.assert_numpy_array_equal(result, exp)
        result = Period("NaT", freq=freq) == idx1
        tm.assert_numpy_array_equal(result, exp)

        result = idx1 != Period("NaT", freq=freq)
        exp = np.array([True, True, True, True])
        tm.assert_numpy_array_equal(result, exp)
        result = Period("NaT", freq=freq) != idx1
        tm.assert_numpy_array_equal(result, exp)

        idx2 = PeriodIndex(["2011-02", "2011-01", "2011-04", "NaT"], freq=freq)
        result = idx1 < idx2
        exp = np.array([True, False, False, False])
        tm.assert_numpy_array_equal(result, exp)

        result = idx1 == idx2
        exp = np.array([False, False, False, False])
        tm.assert_numpy_array_equal(result, exp)

        result = idx1 != idx2
        exp = np.array([True, True, True, True])
        tm.assert_numpy_array_equal(result, exp)

        result = idx1 == idx1
        exp = np.array([True, True, False, True])
        tm.assert_numpy_array_equal(result, exp)

        result = idx1 != idx1
        exp = np.array([False, False, True, False])
        tm.assert_numpy_array_equal(result, exp)
Ejemplo n.º 40
0
    def test_pi_cmp_period(self):
        idx = period_range("2007-01", periods=20, freq="M")

        result = idx < idx[10]
        exp = idx.values < idx.values[10]
        tm.assert_numpy_array_equal(result, exp)
Ejemplo n.º 41
0
def assert_array_dicts_equal(left, right):
    for k, v in left.items():
        tm.assert_numpy_array_equal(np.asarray(v), np.asarray(right[k]))
Ejemplo n.º 42
0
    def test_comp_nat(self, dtype):
        left = pd.PeriodIndex(
            [pd.Period("2011-01-01"), pd.NaT,
             pd.Period("2011-01-03")])
        right = pd.PeriodIndex([pd.NaT, pd.NaT, pd.Period("2011-01-03")])

        if dtype is not None:
            left = left.astype(dtype)
            right = right.astype(dtype)

        result = left == right
        expected = np.array([False, False, True])
        tm.assert_numpy_array_equal(result, expected)

        result = left != right
        expected = np.array([True, True, False])
        tm.assert_numpy_array_equal(result, expected)

        expected = np.array([False, False, False])
        tm.assert_numpy_array_equal(left == pd.NaT, expected)
        tm.assert_numpy_array_equal(pd.NaT == right, expected)

        expected = np.array([True, True, True])
        tm.assert_numpy_array_equal(left != pd.NaT, expected)
        tm.assert_numpy_array_equal(pd.NaT != left, expected)

        expected = np.array([False, False, False])
        tm.assert_numpy_array_equal(left < pd.NaT, expected)
        tm.assert_numpy_array_equal(pd.NaT > left, expected)
Ejemplo n.º 43
0
    def test_factorize_equivalence(self, data_for_grouping, na_sentinel):
        l1, u1 = pd.factorize(data_for_grouping, na_sentinel=na_sentinel)
        l2, u2 = data_for_grouping.factorize(na_sentinel=na_sentinel)

        tm.assert_numpy_array_equal(l1, l2)
        self.assert_extension_array_equal(u1, u2)
Ejemplo n.º 44
0
def test_unique_na_fill(arr, fill_value):
    a = pd.SparseArray(arr, fill_value=fill_value).unique()
    b = pd.Series(arr).unique()
    assert isinstance(a, SparseArray)
    a = np.asarray(a)
    tm.assert_numpy_array_equal(a, b)
Ejemplo n.º 45
0
 def test_get_indexer_backfill(self):
     target = RangeIndex(10)
     indexer = self.index.get_indexer(target, method='backfill')
     expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp)
     tm.assert_numpy_array_equal(indexer, expected)
 def test_coerce_outside_ns_bounds_one_valid(self):
     arr = np.array(['1/1/1000', '1/1/2000'], dtype=object)
     result, _ = tslib.array_to_datetime(arr, errors='coerce')
     expected = [tslib.iNaT, '2000-01-01T00:00:00.000000000-0000']
     tm.assert_numpy_array_equal(
         result, np_array_datetime64_compat(expected, dtype='M8[ns]'))
Ejemplo n.º 47
0
    def test_equals_op(self):
        # GH9947, GH10637
        index_a = self.create_index()
        if isinstance(index_a, PeriodIndex):
            pytest.skip("Skip check for PeriodIndex")

        n = len(index_a)
        index_b = index_a[0:-1]
        index_c = index_a[0:-1].append(index_a[-2:-1])
        index_d = index_a[0:1]

        msg = "Lengths must match|could not be broadcast"
        with pytest.raises(ValueError, match=msg):
            index_a == index_b
        expected1 = np.array([True] * n)
        expected2 = np.array([True] * (n - 1) + [False])
        tm.assert_numpy_array_equal(index_a == index_a, expected1)
        tm.assert_numpy_array_equal(index_a == index_c, expected2)

        # test comparisons with numpy arrays
        array_a = np.array(index_a)
        array_b = np.array(index_a[0:-1])
        array_c = np.array(index_a[0:-1].append(index_a[-2:-1]))
        array_d = np.array(index_a[0:1])
        with pytest.raises(ValueError, match=msg):
            index_a == array_b
        tm.assert_numpy_array_equal(index_a == array_a, expected1)
        tm.assert_numpy_array_equal(index_a == array_c, expected2)

        # test comparisons with Series
        series_a = Series(array_a)
        series_b = Series(array_b)
        series_c = Series(array_c)
        series_d = Series(array_d)
        with pytest.raises(ValueError, match=msg):
            index_a == series_b

        tm.assert_numpy_array_equal(index_a == series_a, expected1)
        tm.assert_numpy_array_equal(index_a == series_c, expected2)

        # cases where length is 1 for one of them
        with pytest.raises(ValueError, match="Lengths must match"):
            index_a == index_d
        with pytest.raises(ValueError, match="Lengths must match"):
            index_a == series_d
        with pytest.raises(ValueError, match="Lengths must match"):
            index_a == array_d
        msg = "Can only compare identically-labeled Series objects"
        with pytest.raises(ValueError, match=msg):
            series_a == series_d
        with pytest.raises(ValueError, match="Lengths must match"):
            series_a == array_d

        # comparing with a scalar should broadcast; note that we are excluding
        # MultiIndex because in this case each item in the index is a tuple of
        # length 2, and therefore is considered an array of length 2 in the
        # comparison instead of a scalar
        if not isinstance(index_a, MultiIndex):
            expected3 = np.array([False] * (len(index_a) - 2) + [True, False])
            # assuming the 2nd to last item is unique in the data
            item = index_a[-2]
            tm.assert_numpy_array_equal(index_a == item, expected3)
            tm.assert_series_equal(series_a == item, Series(expected3))
Ejemplo n.º 48
0
    def test_shift(self):
        shifted = self.ts.shift(1)
        unshifted = shifted.shift(-1)

        tm.assert_index_equal(shifted.index, self.ts.index)
        tm.assert_index_equal(unshifted.index, self.ts.index)
        tm.assert_numpy_array_equal(unshifted.valid().values,
                                    self.ts.values[:-1])

        offset = BDay()
        shifted = self.ts.shift(1, freq=offset)
        unshifted = shifted.shift(-1, freq=offset)

        assert_series_equal(unshifted, self.ts)

        unshifted = self.ts.shift(0, freq=offset)
        assert_series_equal(unshifted, self.ts)

        shifted = self.ts.shift(1, freq='B')
        unshifted = shifted.shift(-1, freq='B')

        assert_series_equal(unshifted, self.ts)

        # corner case
        unshifted = self.ts.shift(0)
        assert_series_equal(unshifted, self.ts)

        # Shifting with PeriodIndex
        ps = tm.makePeriodSeries()
        shifted = ps.shift(1)
        unshifted = shifted.shift(-1)
        tm.assert_index_equal(shifted.index, ps.index)
        tm.assert_index_equal(unshifted.index, ps.index)
        tm.assert_numpy_array_equal(unshifted.valid().values, ps.values[:-1])

        shifted2 = ps.shift(1, 'B')
        shifted3 = ps.shift(1, BDay())
        assert_series_equal(shifted2, shifted3)
        assert_series_equal(ps, shifted2.shift(-1, 'B'))

        self.assertRaises(ValueError, ps.shift, freq='D')

        # legacy support
        shifted4 = ps.shift(1, freq='B')
        assert_series_equal(shifted2, shifted4)

        shifted5 = ps.shift(1, freq=BDay())
        assert_series_equal(shifted5, shifted4)

        # 32-bit taking
        # GH 8129
        index = date_range('2000-01-01', periods=5)
        for dtype in ['int32', 'int64']:
            s1 = Series(np.arange(5, dtype=dtype), index=index)
            p = s1.iloc[1]
            result = s1.shift(periods=p)
            expected = Series([np.nan, 0, 1, 2, 3], index=index)
            assert_series_equal(result, expected)

        # xref 8260
        # with tz
        s = Series(date_range('2000-01-01 09:00:00', periods=5,
                              tz='US/Eastern'), name='foo')
        result = s - s.shift()

        exp = Series(TimedeltaIndex(['NaT'] + ['1 days'] * 4), name='foo')
        assert_series_equal(result, exp)

        # incompat tz
        s2 = Series(date_range('2000-01-01 09:00:00', periods=5,
                               tz='CET'), name='foo')
        self.assertRaises(ValueError, lambda: s - s2)
Ejemplo n.º 49
0
 def test_get_indexer_backfill(self):
     target = Int64Index(np.arange(10))
     indexer = self.index.get_indexer(target, method='backfill')
     expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5])
     tm.assert_numpy_array_equal(indexer, expected)
Ejemplo n.º 50
0
 def test_get_indexer(self):
     target = RangeIndex(10)
     indexer = self.index.get_indexer(target)
     expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1], dtype=np.intp)
     tm.assert_numpy_array_equal(indexer, expected)
Ejemplo n.º 51
0
    def test_block_internal(self):
        idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='block')
        assert isinstance(idx, BlockIndex)
        self.assertEqual(idx.npoints, 2)
        tm.assert_numpy_array_equal(idx.blocs, np.array([2], dtype=np.int32))
        tm.assert_numpy_array_equal(idx.blengths, np.array([2],
                                                           dtype=np.int32))

        idx = _make_index(4, np.array([], dtype=np.int32), kind='block')
        assert isinstance(idx, BlockIndex)
        self.assertEqual(idx.npoints, 0)
        tm.assert_numpy_array_equal(idx.blocs, np.array([], dtype=np.int32))
        tm.assert_numpy_array_equal(idx.blengths, np.array([], dtype=np.int32))

        idx = _make_index(4,
                          np.array([0, 1, 2, 3], dtype=np.int32),
                          kind='block')
        assert isinstance(idx, BlockIndex)
        self.assertEqual(idx.npoints, 4)
        tm.assert_numpy_array_equal(idx.blocs, np.array([0], dtype=np.int32))
        tm.assert_numpy_array_equal(idx.blengths, np.array([4],
                                                           dtype=np.int32))

        idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind='block')
        assert isinstance(idx, BlockIndex)
        self.assertEqual(idx.npoints, 3)
        tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2],
                                                        dtype=np.int32))
        tm.assert_numpy_array_equal(idx.blengths,
                                    np.array([1, 2], dtype=np.int32))
Ejemplo n.º 52
0
    def test_numpy_array_equal_message(self):

        if is_platform_windows():
            pytest.skip("windows has incomparable line-endings "
                        "and uses L on the shape")

        expected = """numpy array are different

numpy array shapes are different
\\[left\\]:  \\(2,\\)
\\[right\\]: \\(3,\\)"""

        with tm.assert_raises_regex(AssertionError, expected):
            assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5]))

        with tm.assert_raises_regex(AssertionError, expected):
            assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5]))

        # scalar comparison
        expected = """Expected type """
        with tm.assert_raises_regex(AssertionError, expected):
            assert_numpy_array_equal(1, 2)
        expected = """expected 2\\.00000 but got 1\\.00000, with decimal 5"""
        with tm.assert_raises_regex(AssertionError, expected):
            assert_almost_equal(1, 2)

        # array / scalar array comparison
        expected = """numpy array are different

numpy array classes are different
\\[left\\]:  ndarray
\\[right\\]: int"""

        with tm.assert_raises_regex(AssertionError, expected):
            # numpy_array_equal only accepts np.ndarray
            assert_numpy_array_equal(np.array([1]), 1)
        with tm.assert_raises_regex(AssertionError, expected):
            assert_almost_equal(np.array([1]), 1)

        # scalar / array comparison
        expected = """numpy array are different

numpy array classes are different
\\[left\\]:  int
\\[right\\]: ndarray"""

        with tm.assert_raises_regex(AssertionError, expected):
            assert_numpy_array_equal(1, np.array([1]))
        with tm.assert_raises_regex(AssertionError, expected):
            assert_almost_equal(1, np.array([1]))

        expected = """numpy array are different

numpy array values are different \\(66\\.66667 %\\)
\\[left\\]:  \\[nan, 2\\.0, 3\\.0\\]
\\[right\\]: \\[1\\.0, nan, 3\\.0\\]"""

        with tm.assert_raises_regex(AssertionError, expected):
            assert_numpy_array_equal(np.array([np.nan, 2, 3]),
                                     np.array([1, np.nan, 3]))
        with tm.assert_raises_regex(AssertionError, expected):
            assert_almost_equal(np.array([np.nan, 2, 3]),
                                np.array([1, np.nan, 3]))

        expected = """numpy array are different

numpy array values are different \\(50\\.0 %\\)
\\[left\\]:  \\[1, 2\\]
\\[right\\]: \\[1, 3\\]"""

        with tm.assert_raises_regex(AssertionError, expected):
            assert_numpy_array_equal(np.array([1, 2]), np.array([1, 3]))
        with tm.assert_raises_regex(AssertionError, expected):
            assert_almost_equal(np.array([1, 2]), np.array([1, 3]))

        expected = """numpy array are different

numpy array values are different \\(50\\.0 %\\)
\\[left\\]:  \\[1\\.1, 2\\.000001\\]
\\[right\\]: \\[1\\.1, 2.0\\]"""

        with tm.assert_raises_regex(AssertionError, expected):
            assert_numpy_array_equal(np.array([1.1, 2.000001]),
                                     np.array([1.1, 2.0]))

        # must pass
        assert_almost_equal(np.array([1.1, 2.000001]), np.array([1.1, 2.0]))

        expected = """numpy array are different

numpy array values are different \\(16\\.66667 %\\)
\\[left\\]:  \\[\\[1, 2\\], \\[3, 4\\], \\[5, 6\\]\\]
\\[right\\]: \\[\\[1, 3\\], \\[3, 4\\], \\[5, 6\\]\\]"""

        with tm.assert_raises_regex(AssertionError, expected):
            assert_numpy_array_equal(np.array([[1, 2], [3, 4], [5, 6]]),
                                     np.array([[1, 3], [3, 4], [5, 6]]))
        with tm.assert_raises_regex(AssertionError, expected):
            assert_almost_equal(np.array([[1, 2], [3, 4], [5, 6]]),
                                np.array([[1, 3], [3, 4], [5, 6]]))

        expected = """numpy array are different

numpy array values are different \\(25\\.0 %\\)
\\[left\\]:  \\[\\[1, 2\\], \\[3, 4\\]\\]
\\[right\\]: \\[\\[1, 3\\], \\[3, 4\\]\\]"""

        with tm.assert_raises_regex(AssertionError, expected):
            assert_numpy_array_equal(np.array([[1, 2], [3, 4]]),
                                     np.array([[1, 3], [3, 4]]))
        with tm.assert_raises_regex(AssertionError, expected):
            assert_almost_equal(np.array([[1, 2], [3, 4]]),
                                np.array([[1, 3], [3, 4]]))

        # allow to overwrite message
        expected = """Index are different

Index shapes are different
\\[left\\]:  \\(2,\\)
\\[right\\]: \\(3,\\)"""

        with tm.assert_raises_regex(AssertionError, expected):
            assert_numpy_array_equal(np.array([1, 2]),
                                     np.array([3, 4, 5]),
                                     obj='Index')
        with tm.assert_raises_regex(AssertionError, expected):
            assert_almost_equal(np.array([1, 2]),
                                np.array([3, 4, 5]),
                                obj='Index')
Ejemplo n.º 53
0
    def test_rank_na_option(self):
        rankdata = pytest.importorskip('scipy.stats.rankdata')

        self.frame['A'][::2] = np.nan
        self.frame['B'][::3] = np.nan
        self.frame['C'][::4] = np.nan
        self.frame['D'][::5] = np.nan

        # bottom
        ranks0 = self.frame.rank(na_option='bottom')
        ranks1 = self.frame.rank(1, na_option='bottom')

        fvals = self.frame.fillna(np.inf).values

        exp0 = np.apply_along_axis(rankdata, 0, fvals)
        exp1 = np.apply_along_axis(rankdata, 1, fvals)

        tm.assert_almost_equal(ranks0.values, exp0)
        tm.assert_almost_equal(ranks1.values, exp1)

        # top
        ranks0 = self.frame.rank(na_option='top')
        ranks1 = self.frame.rank(1, na_option='top')

        fval0 = self.frame.fillna((self.frame.min() - 1).to_dict()).values
        fval1 = self.frame.T
        fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T
        fval1 = fval1.fillna(np.inf).values

        exp0 = np.apply_along_axis(rankdata, 0, fval0)
        exp1 = np.apply_along_axis(rankdata, 1, fval1)

        tm.assert_almost_equal(ranks0.values, exp0)
        tm.assert_almost_equal(ranks1.values, exp1)

        # descending

        # bottom
        ranks0 = self.frame.rank(na_option='top', ascending=False)
        ranks1 = self.frame.rank(1, na_option='top', ascending=False)

        fvals = self.frame.fillna(np.inf).values

        exp0 = np.apply_along_axis(rankdata, 0, -fvals)
        exp1 = np.apply_along_axis(rankdata, 1, -fvals)

        tm.assert_almost_equal(ranks0.values, exp0)
        tm.assert_almost_equal(ranks1.values, exp1)

        # descending

        # top
        ranks0 = self.frame.rank(na_option='bottom', ascending=False)
        ranks1 = self.frame.rank(1, na_option='bottom', ascending=False)

        fval0 = self.frame.fillna((self.frame.min() - 1).to_dict()).values
        fval1 = self.frame.T
        fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T
        fval1 = fval1.fillna(np.inf).values

        exp0 = np.apply_along_axis(rankdata, 0, -fval0)
        exp1 = np.apply_along_axis(rankdata, 1, -fval1)

        tm.assert_numpy_array_equal(ranks0.values, exp0)
        tm.assert_numpy_array_equal(ranks1.values, exp1)
Ejemplo n.º 54
0
 def test_get_indexer(self):
     target = Int64Index(np.arange(10))
     indexer = self.index.get_indexer(target)
     expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1])
     tm.assert_numpy_array_equal(indexer, expected)
Ejemplo n.º 55
0
    def test_convert_downcast_int64(self):
        from pandas._libs.parsers import na_values

        arr = np.array([1, 2, 7, 8, 10], dtype=np.int64)
        expected = np.array([1, 2, 7, 8, 10], dtype=np.int8)

        # default argument
        result = lib.downcast_int64(arr, na_values)
        tm.assert_numpy_array_equal(result, expected)

        result = lib.downcast_int64(arr, na_values, use_unsigned=False)
        tm.assert_numpy_array_equal(result, expected)

        expected = np.array([1, 2, 7, 8, 10], dtype=np.uint8)
        result = lib.downcast_int64(arr, na_values, use_unsigned=True)
        tm.assert_numpy_array_equal(result, expected)

        # still cast to int8 despite use_unsigned=True
        # because of the negative number as an element
        arr = np.array([1, 2, -7, 8, 10], dtype=np.int64)
        expected = np.array([1, 2, -7, 8, 10], dtype=np.int8)
        result = lib.downcast_int64(arr, na_values, use_unsigned=True)
        tm.assert_numpy_array_equal(result, expected)

        arr = np.array([1, 2, 7, 8, 300], dtype=np.int64)
        expected = np.array([1, 2, 7, 8, 300], dtype=np.int16)
        result = lib.downcast_int64(arr, na_values)
        tm.assert_numpy_array_equal(result, expected)

        int8_na = na_values[np.int8]
        int64_na = na_values[np.int64]
        arr = np.array([int64_na, 2, 3, 10, 15], dtype=np.int64)
        expected = np.array([int8_na, 2, 3, 10, 15], dtype=np.int8)
        result = lib.downcast_int64(arr, na_values)
        tm.assert_numpy_array_equal(result, expected)
Ejemplo n.º 56
0
    def test_lookup_array(self):
        for kind in ['integer', 'block']:
            idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind=kind)

            res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32))
            exp = np.array([-1, -1, 0], dtype=np.int32)
            tm.assert_numpy_array_equal(res, exp)

            res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32))
            exp = np.array([-1, 0, -1, 1], dtype=np.int32)
            tm.assert_numpy_array_equal(res, exp)

            idx = _make_index(4, np.array([], dtype=np.int32), kind=kind)
            res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32))
            exp = np.array([-1, -1, -1, -1], dtype=np.int32)

            idx = _make_index(4,
                              np.array([0, 1, 2, 3], dtype=np.int32),
                              kind=kind)
            res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32))
            exp = np.array([-1, 0, 2], dtype=np.int32)
            tm.assert_numpy_array_equal(res, exp)

            res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32))
            exp = np.array([-1, 2, 1, 3], dtype=np.int32)
            tm.assert_numpy_array_equal(res, exp)

            idx = _make_index(4,
                              np.array([0, 2, 3], dtype=np.int32),
                              kind=kind)
            res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32))
            exp = np.array([1, -1, 2, 0], dtype=np.int32)
            tm.assert_numpy_array_equal(res, exp)

            res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32))
            exp = np.array([-1, -1, 1, -1], dtype=np.int32)
            tm.assert_numpy_array_equal(res, exp)
Ejemplo n.º 57
0
 def test_convert_sql_column_unicode(self):
     arr = np.array([u('1.5'), None, u('3'), u('4.2')], dtype=object)
     result = lib.convert_sql_column(arr)
     expected = np.array([u('1.5'), np.nan, u('3'), u('4.2')], dtype=object)
     tm.assert_numpy_array_equal(result, expected)
Ejemplo n.º 58
0
def test_argsort(idx):
    result = idx.argsort()
    expected = idx.values.argsort()
    tm.assert_numpy_array_equal(result, expected)
Ejemplo n.º 59
0
 def test_convert_sql_column_strings(self):
     arr = np.array(['1.5', None, '3', '4.2'], dtype=object)
     result = lib.convert_sql_column(arr)
     expected = np.array(['1.5', np.nan, '3', '4.2'], dtype=object)
     tm.assert_numpy_array_equal(result, expected)
Ejemplo n.º 60
0
 def test_convert_sql_column_decimals(self):
     from decimal import Decimal
     arr = np.array([Decimal('1.5'), None, Decimal('3'), Decimal('4.2')])
     result = lib.convert_sql_column(arr)
     expected = np.array([1.5, np.nan, 3, 4.2], dtype='f8')
     tm.assert_numpy_array_equal(result, expected)