def testit(): for f, f2 in [(self.frame, self.frame2), (self.mixed, self.mixed2)]: f11 = f f12 = f + 1 f21 = f2 f22 = f2 + 1 for op, op_str in [('gt', '>'), ('lt', '<'), ('ge', '>='), ('le', '<='), ('eq', '=='), ('ne', '!=')]: op = getattr(operator, op) result = expr._can_use_numexpr(op, op_str, f11, f12, 'evaluate') self.assertNotEqual(result, f11._is_mixed_type) result = expr.evaluate(op, op_str, f11, f12, use_numexpr=True) expected = expr.evaluate(op, op_str, f11, f12, use_numexpr=False) tm.assert_numpy_array_equal(result, expected.values) result = expr._can_use_numexpr(op, op_str, f21, f22, 'evaluate') self.assertFalse(result)
def test_constructor(self): # explicit construction index = Float64Index([1, 2, 3, 4, 5]) assert isinstance(index, Float64Index) expected = np.array([1, 2, 3, 4, 5], dtype='float64') tm.assert_numpy_array_equal(index.values, expected) index = Float64Index(np.array([1, 2, 3, 4, 5])) assert isinstance(index, Float64Index) index = Float64Index([1., 2, 3, 4, 5]) assert isinstance(index, Float64Index) index = Float64Index(np.array([1., 2, 3, 4, 5])) assert isinstance(index, Float64Index) assert index.dtype == float index = Float64Index(np.array([1., 2, 3, 4, 5]), dtype=np.float32) assert isinstance(index, Float64Index) assert index.dtype == np.float64 index = Float64Index(np.array([1, 2, 3, 4, 5]), dtype=np.float32) assert isinstance(index, Float64Index) assert index.dtype == np.float64 # nan handling result = Float64Index([np.nan, np.nan]) assert pd.isna(result.values).all() result = Float64Index(np.array([np.nan])) assert pd.isna(result.values).all() result = Index(np.array([np.nan])) assert pd.isna(result.values).all()
def test_conversions(data_missing): # astype to object series df = pd.DataFrame({'A': data_missing}) result = df['A'].astype('object') expected = pd.Series(np.array([np.nan, 1], dtype=object), name='A') tm.assert_series_equal(result, expected) # convert to object ndarray # we assert that we are exactly equal # including type conversions of scalars result = df['A'].astype('object').values expected = np.array([np.nan, 1], dtype=object) tm.assert_numpy_array_equal(result, expected) for r, e in zip(result, expected): if pd.isnull(r): assert pd.isnull(e) elif is_integer(r): # PY2 can be int or long assert r == e assert is_integer(e) else: assert r == e assert type(r) == type(e)
def test_equals(self): # GH 13107 for tz in [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']: idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT']) assert idx.equals(idx) assert idx.equals(idx.copy()) assert idx.equals(idx.asobject) assert idx.asobject.equals(idx) assert idx.asobject.equals(idx.asobject) assert not idx.equals(list(idx)) assert not idx.equals(pd.Series(idx)) idx2 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT'], tz='US/Pacific') assert not idx.equals(idx2) assert not idx.equals(idx2.copy()) assert not idx.equals(idx2.asobject) assert not idx.asobject.equals(idx2) assert not idx.equals(list(idx2)) assert not idx.equals(pd.Series(idx2)) # same internal, different tz idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz='US/Pacific') tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) assert not idx.equals(idx3) assert not idx.equals(idx3.copy()) assert not idx.equals(idx3.asobject) assert not idx.asobject.equals(idx3) assert not idx.equals(list(idx3)) assert not idx.equals(pd.Series(idx3))
def test_properties(self, closed): index = self.create_index(closed=closed) assert len(index) == 10 assert index.size == 10 assert index.shape == (10, ) tm.assert_index_equal(index.left, Index(np.arange(10))) tm.assert_index_equal(index.right, Index(np.arange(1, 11))) tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5))) assert index.closed == closed ivs = [Interval(l, r, closed) for l, r in zip(range(10), range(1, 11))] expected = np.array(ivs, dtype=object) tm.assert_numpy_array_equal(np.asarray(index), expected) # with nans index = self.create_index_with_nan(closed=closed) assert len(index) == 10 assert index.size == 10 assert index.shape == (10, ) expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9]) expected_right = expected_left + 1 expected_mid = expected_left + 0.5 tm.assert_index_equal(index.left, expected_left) tm.assert_index_equal(index.right, expected_right) tm.assert_index_equal(index.mid, expected_mid) assert index.closed == closed ivs = [Interval(l, r, closed) if notna(l) else np.nan for l, r in zip(expected_left, expected_right)] expected = np.array(ivs, dtype=object) tm.assert_numpy_array_equal(np.asarray(index), expected)
def test_constructor_with_datetimelike(self): # 12077 # constructor wwth a datetimelike and NaT for dtl in [date_range('1995-01-01 00:00:00', periods=5, freq='s'), date_range('1995-01-01 00:00:00', periods=5, freq='s', tz='US/Eastern'), timedelta_range('1 day', periods=5, freq='s')]: s = Series(dtl) c = Categorical(s) expected = type(dtl)(s) expected.freq = None tm.assert_index_equal(c.categories, expected) tm.assert_numpy_array_equal(c.codes, np.arange(5, dtype='int8')) # with NaT s2 = s.copy() s2.iloc[-1] = NaT c = Categorical(s2) expected = type(dtl)(s2.dropna()) expected.freq = None tm.assert_index_equal(c.categories, expected) exp = np.array([0, 1, 2, 3, -1], dtype=np.int8) tm.assert_numpy_array_equal(c.codes, exp) result = repr(c) assert 'NaT' in result
def test_simple(self): x, y = list('ABC'), [1, 22] result1, result2 = cartesian_product([x, y]) expected1 = np.array(['A', 'A', 'B', 'B', 'C', 'C']) expected2 = np.array([1, 22, 1, 22, 1, 22]) tm.assert_numpy_array_equal(result1, expected1) tm.assert_numpy_array_equal(result2, expected2)
def test_value_counts_inferred(self): klasses = [Index, Series] for klass in klasses: s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a'] s = klass(s_values) expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c']) tm.assert_series_equal(s.value_counts(), expected) if isinstance(s, Index): exp = Index(np.unique(np.array(s_values, dtype=np.object_))) tm.assert_index_equal(s.unique(), exp) else: exp = np.unique(np.array(s_values, dtype=np.object_)) tm.assert_numpy_array_equal(s.unique(), exp) assert s.nunique() == 4 # don't sort, have to sort after the fact as not sorting is # platform-dep hist = s.value_counts(sort=False).sort_values() expected = Series([3, 1, 4, 2], index=list('acbd')).sort_values() tm.assert_series_equal(hist, expected) # sort ascending hist = s.value_counts(ascending=True) expected = Series([1, 2, 3, 4], index=list('cdab')) tm.assert_series_equal(hist, expected) # relative histogram. hist = s.value_counts(normalize=True) expected = Series([.4, .3, .2, .1], index=['b', 'a', 'd', 'c']) tm.assert_series_equal(hist, expected)
def test_interleave(self): # interleave with object result = self.tzframe.assign(D='foo').values expected = np.array([[Timestamp('2013-01-01 00:00:00'), Timestamp('2013-01-02 00:00:00'), Timestamp('2013-01-03 00:00:00')], [Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern'), pd.NaT, Timestamp('2013-01-03 00:00:00-0500', tz='US/Eastern')], [Timestamp('2013-01-01 00:00:00+0100', tz='CET'), pd.NaT, Timestamp('2013-01-03 00:00:00+0100', tz='CET')], ['foo', 'foo', 'foo']], dtype=object).T tm.assert_numpy_array_equal(result, expected) # interleave with only datetime64[ns] result = self.tzframe.values expected = np.array([[Timestamp('2013-01-01 00:00:00'), Timestamp('2013-01-02 00:00:00'), Timestamp('2013-01-03 00:00:00')], [Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern'), pd.NaT, Timestamp('2013-01-03 00:00:00-0500', tz='US/Eastern')], [Timestamp('2013-01-01 00:00:00+0100', tz='CET'), pd.NaT, Timestamp('2013-01-03 00:00:00+0100', tz='CET')]], dtype=object).T tm.assert_numpy_array_equal(result, expected)
def _check(dtype): obj = np.array(np.random.randn(20), dtype=dtype) bins = np.array([6, 12, 20]) out = np.zeros((3, 4), dtype) counts = np.zeros(len(out), dtype=np.int64) labels = _ensure_int64(np.repeat(np.arange(3), np.diff(np.r_[0, bins]))) func = getattr(groupby, 'group_ohlc_%s' % dtype) func(out, counts, obj[:, None], labels) def _ohlc(group): if isna(group).all(): return np.repeat(nan, 4) return [group[0], group.max(), group.min(), group[-1]] expected = np.array([_ohlc(obj[:6]), _ohlc(obj[6:12]), _ohlc(obj[12:])]) assert_almost_equal(out, expected) tm.assert_numpy_array_equal(counts, np.array([6, 6, 8], dtype=np.int64)) obj[:6] = nan func(out, counts, obj[:, None], labels) expected[0] = nan assert_almost_equal(out, expected)
def test_include_na(self): s = ['a', 'b', np.nan] res = get_dummies(s, sparse=self.sparse) exp = DataFrame({'a': {0: 1.0, 1: 0.0, 2: 0.0}, 'b': {0: 0.0, 1: 1.0, 2: 0.0}}) assert_frame_equal(res, exp) # Sparse dataframes do not allow nan labelled columns, see #GH8822 res_na = get_dummies(s, dummy_na=True, sparse=self.sparse) exp_na = DataFrame({nan: {0: 0.0, 1: 0.0, 2: 1.0}, 'a': {0: 1.0, 1: 0.0, 2: 0.0}, 'b': {0: 0.0, 1: 1.0, 2: 0.0}}).reindex_axis( ['a', 'b', nan], 1) # hack (NaN handling in assert_index_equal) exp_na.columns = res_na.columns assert_frame_equal(res_na, exp_na) res_just_na = get_dummies([nan], dummy_na=True, sparse=self.sparse) exp_just_na = DataFrame(Series(1.0, index=[0]), columns=[nan]) tm.assert_numpy_array_equal(res_just_na.values, exp_just_na.values)
def test_frame_non_unique_columns(self): df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 2], columns=['x', 'x']) self.assertRaises(ValueError, df.to_json, orient='index') self.assertRaises(ValueError, df.to_json, orient='columns') self.assertRaises(ValueError, df.to_json, orient='records') assert_frame_equal(df, read_json(df.to_json(orient='split'), orient='split', dtype=False)) unser = read_json(df.to_json(orient='values'), orient='values') tm.assert_numpy_array_equal(df.values, unser.values) # GH4377; duplicate columns not processing correctly df = DataFrame([['a', 'b'], ['c', 'd']], index=[ 1, 2], columns=['x', 'y']) result = read_json(df.to_json(orient='split'), orient='split') assert_frame_equal(result, df) def _check(df): result = read_json(df.to_json(orient='split'), orient='split', convert_dates=['x']) assert_frame_equal(result, df) for o in [[['a', 'b'], ['c', 'd']], [[1.5, 2.5], [3.5, 4.5]], [[1, 2.5], [3, 4.5]], [[Timestamp('20130101'), 3.5], [Timestamp('20130102'), 4.5]]]: _check(DataFrame(o, index=[1, 2], columns=['x', 'x']))
def test_as_matrix_deprecated(self, float_frame): # GH 18458 with tm.assert_produces_warning(FutureWarning): cols = float_frame.columns.tolist() result = float_frame.as_matrix(columns=cols) expected = float_frame.values tm.assert_numpy_array_equal(result, expected)
def test_compare_timedelta_ndarray(self): # GH11835 periods = [Timedelta('0 days 01:00:00'), Timedelta('0 days 01:00:00')] arr = np.array(periods) result = arr[0] > arr expected = np.array([False, False]) tm.assert_numpy_array_equal(result, expected)
def test_cast_scalar_to_array(self): arr = cast_scalar_to_array((3, 2), 1, dtype=np.int64) exp = np.ones((3, 2), dtype=np.int64) tm.assert_numpy_array_equal(arr, exp) arr = cast_scalar_to_array((3, 2), 1.1) exp = np.empty((3, 2), dtype=np.float64) exp.fill(1.1) tm.assert_numpy_array_equal(arr, exp) arr = cast_scalar_to_array((2, 3), Timestamp('2011-01-01')) exp = np.empty((2, 3), dtype='datetime64[ns]') exp.fill(np.datetime64('2011-01-01')) tm.assert_numpy_array_equal(arr, exp) # pandas dtype is stored as object dtype obj = Timestamp('2011-01-01', tz='US/Eastern') arr = cast_scalar_to_array((2, 3), obj) exp = np.empty((2, 3), dtype=np.object) exp.fill(obj) tm.assert_numpy_array_equal(arr, exp) obj = Period('2011-01-01', freq='D') arr = cast_scalar_to_array((2, 3), obj) exp = np.empty((2, 3), dtype=np.object) exp.fill(obj) tm.assert_numpy_array_equal(arr, exp)
def test_noright(self): data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575]) result, bins = cut(data, 4, right=False, retbins=True) exp_codes = np.array([0, 0, 0, 2, 3, 0, 1], dtype=np.int8) tm.assert_numpy_array_equal(result.codes, exp_codes) exp = np.array([0.2, 2.575, 4.95, 7.325, 9.7095]) tm.assert_almost_equal(bins, exp)
def test_arraylike(self): data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1] result, bins = cut(data, 3, retbins=True) exp_codes = np.array([0, 0, 0, 1, 2, 0], dtype=np.int8) tm.assert_numpy_array_equal(result.codes, exp_codes) exp = np.array([0.1905, 3.36666667, 6.53333333, 9.7]) tm.assert_almost_equal(bins, exp)
def test_difference_base(self): for name, idx in compat.iteritems(self.indices): first = idx[2:] second = idx[:4] answer = idx[4:] result = first.difference(second) if isinstance(idx, CategoricalIndex): pass else: self.assertTrue(tm.equalContents(result, answer)) # GH 10149 cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" with tm.assertRaisesRegexp(ValueError, msg): result = first.difference(case) elif isinstance(idx, CategoricalIndex): pass elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)): self.assertEqual(result.__class__, answer.__class__) tm.assert_numpy_array_equal(result.asi8, answer.asi8) else: result = first.difference(case) self.assertTrue(tm.equalContents(result, answer)) if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" with tm.assertRaisesRegexp(TypeError, msg): result = first.difference([1, 2, 3])
def test_from_product_datetimeindex(): dt_index = date_range('2000-01-01', periods=2) mi = pd.MultiIndex.from_product([[1, 2], dt_index]) etalon = construct_1d_object_array_from_listlike([(1, pd.Timestamp( '2000-01-01')), (1, pd.Timestamp('2000-01-02')), (2, pd.Timestamp( '2000-01-01')), (2, pd.Timestamp('2000-01-02'))]) tm.assert_numpy_array_equal(mi.values, etalon)
def test_map(self): ci = pd.CategoricalIndex(list('ABABC'), categories=list('CBA'), ordered=True) result = ci.map(lambda x: x.lower()) exp = pd.Categorical(list('ababc'), categories=list('cba'), ordered=True) tm.assert_categorical_equal(result, exp) ci = pd.CategoricalIndex(list('ABABC'), categories=list('BAC'), ordered=False, name='XXX') result = ci.map(lambda x: x.lower()) exp = pd.Categorical(list('ababc'), categories=list('bac'), ordered=False) tm.assert_categorical_equal(result, exp) tm.assert_numpy_array_equal(ci.map(lambda x: 1), np.array([1] * 5, dtype=np.int64)) # change categories dtype ci = pd.CategoricalIndex(list('ABABC'), categories=list('BAC'), ordered=False) def f(x): return {'A': 10, 'B': 20, 'C': 30}.get(x) result = ci.map(f) exp = pd.Categorical([10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False) tm.assert_categorical_equal(result, exp)
def test_numpy_argsort(self): for k, ind in self.indices.items(): result = np.argsort(ind) expected = ind.argsort() tm.assert_numpy_array_equal(result, expected) # these are the only two types that perform # pandas compatibility input validation - the # rest already perform separate (or no) such # validation via their 'values' attribute as # defined in pandas.indexes/base.py - they # cannot be changed at the moment due to # backwards compatibility concerns if isinstance(type(ind), (CategoricalIndex, RangeIndex)): msg = "the 'axis' parameter is not supported" tm.assertRaisesRegexp(ValueError, msg, np.argsort, ind, axis=1) msg = "the 'kind' parameter is not supported" tm.assertRaisesRegexp(ValueError, msg, np.argsort, ind, kind='mergesort') msg = "the 'order' parameter is not supported" tm.assertRaisesRegexp(ValueError, msg, np.argsort, ind, order=('a', 'b'))
def _check_axes_shape(self, axes, axes_num=None, layout=None, figsize=None): """ Check expected number of axes is drawn in expected layout Parameters ---------- axes : matplotlib Axes object, or its list-like axes_num : number expected number of axes. Unnecessary axes should be set to invisible. layout : tuple expected layout, (expected number of rows , columns) figsize : tuple expected figsize. default is matplotlib default """ from pandas.plotting._matplotlib.tools import _flatten if figsize is None: figsize = self.default_figsize visible_axes = self._flatten_visible(axes) if axes_num is not None: assert len(visible_axes) == axes_num for ax in visible_axes: # check something drawn on visible axes assert len(ax.get_children()) > 0 if layout is not None: result = self._get_axes_layout(_flatten(axes)) assert result == layout tm.assert_numpy_array_equal( visible_axes[0].figure.get_size_inches(), np.array(figsize, dtype=np.float64))
def test_equals(self, freq): # GH#13107 idx = pd.PeriodIndex(['2011-01-01', '2011-01-02', 'NaT'], freq=freq) assert idx.equals(idx) assert idx.equals(idx.copy()) assert idx.equals(idx.astype(object)) assert idx.astype(object).equals(idx) assert idx.astype(object).equals(idx.astype(object)) assert not idx.equals(list(idx)) assert not idx.equals(pd.Series(idx)) idx2 = pd.PeriodIndex(['2011-01-01', '2011-01-02', 'NaT'], freq='H') assert not idx.equals(idx2) assert not idx.equals(idx2.copy()) assert not idx.equals(idx2.astype(object)) assert not idx.astype(object).equals(idx2) assert not idx.equals(list(idx2)) assert not idx.equals(pd.Series(idx2)) # same internal, different tz idx3 = pd.PeriodIndex._simple_new(idx.asi8, freq='H') tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) assert not idx.equals(idx3) assert not idx.equals(idx3.copy()) assert not idx.equals(idx3.astype(object)) assert not idx.astype(object).equals(idx3) assert not idx.equals(list(idx3)) assert not idx.equals(pd.Series(idx3))
def test_get_value(self): # GH 17717 p0 = pd.Period('2017-09-01') p1 = pd.Period('2017-09-02') p2 = pd.Period('2017-09-03') idx0 = pd.PeriodIndex([p0, p1, p2]) input0 = np.array([1, 2, 3]) expected0 = 2 result0 = idx0.get_value(input0, p1) assert result0 == expected0 idx1 = pd.PeriodIndex([p1, p1, p2]) input1 = np.array([1, 2, 3]) expected1 = np.array([1, 2]) result1 = idx1.get_value(input1, p1) tm.assert_numpy_array_equal(result1, expected1) idx2 = pd.PeriodIndex([p1, p2, p1]) input2 = np.array([1, 2, 3]) expected2 = np.array([1, 3]) result2 = idx2.get_value(input2, p1) tm.assert_numpy_array_equal(result2, expected2)
def test_get_indexer(self): # GH 17717 p1 = pd.Period('2017-09-01') p2 = pd.Period('2017-09-04') p3 = pd.Period('2017-09-07') tp0 = pd.Period('2017-08-31') tp1 = pd.Period('2017-09-02') tp2 = pd.Period('2017-09-05') tp3 = pd.Period('2017-09-09') idx = pd.PeriodIndex([p1, p2, p3]) tm.assert_numpy_array_equal(idx.get_indexer(idx), np.array([0, 1, 2], dtype=np.intp)) target = pd.PeriodIndex([tp0, tp1, tp2, tp3]) tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), np.array([-1, 0, 1, 2], dtype=np.intp)) tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), np.array([0, 1, 2, -1], dtype=np.intp)) tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), np.array([0, 0, 1, 2], dtype=np.intp)) res = idx.get_indexer(target, 'nearest', tolerance=pd.Timedelta('1 day')) tm.assert_numpy_array_equal(res, np.array([0, 0, 1, -1], dtype=np.intp))
def test_groupby_categorical_no_compress(self): data = Series(np.random.randn(9)) codes = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2]) cats = Categorical.from_codes(codes, [0, 1, 2], ordered=True) result = data.groupby(cats).mean() exp = data.groupby(codes).mean() exp.index = CategoricalIndex(exp.index, categories=cats.categories, ordered=cats.ordered) assert_series_equal(result, exp) codes = np.array([0, 0, 0, 1, 1, 1, 3, 3, 3]) cats = Categorical.from_codes(codes, [0, 1, 2, 3], ordered=True) result = data.groupby(cats).mean() exp = data.groupby(codes).mean().reindex(cats.categories) exp.index = CategoricalIndex(exp.index, categories=cats.categories, ordered=cats.ordered) assert_series_equal(result, exp) cats = Categorical(["a", "a", "a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c", "d"], ordered=True) data = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 4, 5], "b": cats}) result = data.groupby("b").mean() result = result["a"].values exp = np.array([1, 2, 4, np.nan]) tm.assert_numpy_array_equal(result, exp)
def test_convert_sql_column_unicode(self): arr = np.array([u('1.5'), None, u('3'), u('4.2')], dtype=object) result = lib.convert_sql_column(arr) expected = np.array([u('1.5'), np.nan, u('3'), u('4.2')], dtype=object) tm.assert_numpy_array_equal(result, expected)
def test_rename_set_name(self): s = Series(range(4), index=list('abcd')) for name in ['foo', 123, 123., datetime(2001, 11, 11), ('foo',)]: result = s.rename(name) assert result.name == name tm.assert_numpy_array_equal(result.index.values, s.index.values) assert s.name is None
def test_cython_right_outer_join(self): left = a_([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64) right = a_([1, 1, 0, 4, 2, 2, 1], dtype=np.int64) max_group = 5 rs, ls = libjoin.left_outer_join(right, left, max_group) exp_ls = left.argsort(kind='mergesort') exp_rs = right.argsort(kind='mergesort') # 0 1 1 1 exp_li = a_([0, 1, 2, 3, 4, 5, 3, 4, 5, 3, 4, 5, # 2 2 4 6, 7, 8, 6, 7, 8, -1]) exp_ri = a_([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6]) exp_ls = exp_ls.take(exp_li) exp_ls[exp_li == -1] = -1 exp_rs = exp_rs.take(exp_ri) exp_rs[exp_ri == -1] = -1 tm.assert_numpy_array_equal(ls, exp_ls, check_dtype=False) tm.assert_numpy_array_equal(rs, exp_rs, check_dtype=False)
def test_reindex_empty_index(self): # See GH16770 c = CategoricalIndex([]) res, indexer = c.reindex(['a', 'b']) tm.assert_index_equal(res, Index(['a', 'b']), exact=True) tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp))
def assert_as_array_equals(slc, asarray): tm.assert_numpy_array_equal( BlockPlacement(slc).as_array, np.asarray(asarray, dtype=np.int64) )
def test_convert_non_hashable(self): # GH13324 # make sure that we are handing non-hashables arr = np.array([[10.0, 2], 1.0, "apple"]) result = lib.maybe_convert_numeric(arr, set(), False, True) tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan]))
def test_construction(self): ci = self.create_index(categories=list('abcd')) categories = ci.categories result = Index(ci) tm.assert_index_equal(result, ci, exact=True) assert not result.ordered result = Index(ci.values) tm.assert_index_equal(result, ci, exact=True) assert not result.ordered # empty result = CategoricalIndex(categories=categories) tm.assert_index_equal(result.categories, Index(categories)) tm.assert_numpy_array_equal(result.codes, np.array([], dtype='int8')) assert not result.ordered # passing categories result = CategoricalIndex(list('aabbca'), categories=categories) tm.assert_index_equal(result.categories, Index(categories)) tm.assert_numpy_array_equal(result.codes, np.array([0, 0, 1, 1, 2, 0], dtype='int8')) c = pd.Categorical(list('aabbca')) result = CategoricalIndex(c) tm.assert_index_equal(result.categories, Index(list('abc'))) tm.assert_numpy_array_equal(result.codes, np.array([0, 0, 1, 1, 2, 0], dtype='int8')) assert not result.ordered result = CategoricalIndex(c, categories=categories) tm.assert_index_equal(result.categories, Index(categories)) tm.assert_numpy_array_equal(result.codes, np.array([0, 0, 1, 1, 2, 0], dtype='int8')) assert not result.ordered ci = CategoricalIndex(c, categories=list('abcd')) result = CategoricalIndex(ci) tm.assert_index_equal(result.categories, Index(categories)) tm.assert_numpy_array_equal(result.codes, np.array([0, 0, 1, 1, 2, 0], dtype='int8')) assert not result.ordered result = CategoricalIndex(ci, categories=list('ab')) tm.assert_index_equal(result.categories, Index(list('ab'))) tm.assert_numpy_array_equal( result.codes, np.array([0, 0, 1, 1, -1, 0], dtype='int8')) assert not result.ordered result = CategoricalIndex(ci, categories=list('ab'), ordered=True) tm.assert_index_equal(result.categories, Index(list('ab'))) tm.assert_numpy_array_equal( result.codes, np.array([0, 0, 1, 1, -1, 0], dtype='int8')) assert result.ordered result = pd.CategoricalIndex(ci, categories=list('ab'), ordered=True) expected = pd.CategoricalIndex(ci, categories=list('ab'), ordered=True, dtype='category') tm.assert_index_equal(result, expected, exact=True) # turn me to an Index result = Index(np.array(ci)) assert isinstance(result, Index) assert not isinstance(result, CategoricalIndex)
def test_order(self): # with freq idx1 = DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], freq='D', name='idx') idx2 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], freq='H', tz='Asia/Tokyo', name='tzidx') for idx in [idx1, idx2]: ordered = idx.sort_values() tm.assert_index_equal(ordered, idx) assert ordered.freq == idx.freq ordered = idx.sort_values(ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) assert ordered.freq == expected.freq assert ordered.freq.n == -1 ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, idx) tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) assert ordered.freq == idx.freq ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False) assert ordered.freq == expected.freq assert ordered.freq.n == -1 # without freq for tz in self.tz: idx1 = DatetimeIndex(['2011-01-01', '2011-01-03', '2011-01-05', '2011-01-02', '2011-01-01'], tz=tz, name='idx1') exp1 = DatetimeIndex(['2011-01-01', '2011-01-01', '2011-01-02', '2011-01-03', '2011-01-05'], tz=tz, name='idx1') idx2 = DatetimeIndex(['2011-01-01', '2011-01-03', '2011-01-05', '2011-01-02', '2011-01-01'], tz=tz, name='idx2') exp2 = DatetimeIndex(['2011-01-01', '2011-01-01', '2011-01-02', '2011-01-03', '2011-01-05'], tz=tz, name='idx2') idx3 = DatetimeIndex([pd.NaT, '2011-01-03', '2011-01-05', '2011-01-02', pd.NaT], tz=tz, name='idx3') exp3 = DatetimeIndex([pd.NaT, pd.NaT, '2011-01-02', '2011-01-03', '2011-01-05'], tz=tz, name='idx3') for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3)]: ordered = idx.sort_values() tm.assert_index_equal(ordered, expected) assert ordered.freq is None ordered = idx.sort_values(ascending=False) tm.assert_index_equal(ordered, expected[::-1]) assert ordered.freq is None ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, expected[::-1]) exp = np.array([2, 1, 3, 4, 0]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None
def assert_block_equal(left, right): tm.assert_numpy_array_equal(left.values, right.values) assert left.dtype == right.dtype assert isinstance(left.mgr_locs, BlockPlacement) assert isinstance(right.mgr_locs, BlockPlacement) tm.assert_numpy_array_equal(left.mgr_locs.as_array, right.mgr_locs.as_array)
def test_mgr_locs(self): assert isinstance(self.fblock.mgr_locs, BlockPlacement) tm.assert_numpy_array_equal( self.fblock.mgr_locs.as_array, np.array([0, 2, 4], dtype=np.int64) )
def test_eq(self, other): idx = PeriodIndex(["2017", "2017", "2018"], freq="D") expected = np.array([True, True, False]) result = idx == other tm.assert_numpy_array_equal(result, expected)
def test_pi_cmp_nat(self, freq): idx1 = PeriodIndex(["2011-01", "2011-02", "NaT", "2011-05"], freq=freq) result = idx1 > Period("2011-02", freq=freq) exp = np.array([False, False, False, True]) tm.assert_numpy_array_equal(result, exp) result = Period("2011-02", freq=freq) < idx1 tm.assert_numpy_array_equal(result, exp) result = idx1 == Period("NaT", freq=freq) exp = np.array([False, False, False, False]) tm.assert_numpy_array_equal(result, exp) result = Period("NaT", freq=freq) == idx1 tm.assert_numpy_array_equal(result, exp) result = idx1 != Period("NaT", freq=freq) exp = np.array([True, True, True, True]) tm.assert_numpy_array_equal(result, exp) result = Period("NaT", freq=freq) != idx1 tm.assert_numpy_array_equal(result, exp) idx2 = PeriodIndex(["2011-02", "2011-01", "2011-04", "NaT"], freq=freq) result = idx1 < idx2 exp = np.array([True, False, False, False]) tm.assert_numpy_array_equal(result, exp) result = idx1 == idx2 exp = np.array([False, False, False, False]) tm.assert_numpy_array_equal(result, exp) result = idx1 != idx2 exp = np.array([True, True, True, True]) tm.assert_numpy_array_equal(result, exp) result = idx1 == idx1 exp = np.array([True, True, False, True]) tm.assert_numpy_array_equal(result, exp) result = idx1 != idx1 exp = np.array([False, False, True, False]) tm.assert_numpy_array_equal(result, exp)
def test_pi_cmp_period(self): idx = period_range("2007-01", periods=20, freq="M") result = idx < idx[10] exp = idx.values < idx.values[10] tm.assert_numpy_array_equal(result, exp)
def assert_array_dicts_equal(left, right): for k, v in left.items(): tm.assert_numpy_array_equal(np.asarray(v), np.asarray(right[k]))
def test_comp_nat(self, dtype): left = pd.PeriodIndex( [pd.Period("2011-01-01"), pd.NaT, pd.Period("2011-01-03")]) right = pd.PeriodIndex([pd.NaT, pd.NaT, pd.Period("2011-01-03")]) if dtype is not None: left = left.astype(dtype) right = right.astype(dtype) result = left == right expected = np.array([False, False, True]) tm.assert_numpy_array_equal(result, expected) result = left != right expected = np.array([True, True, False]) tm.assert_numpy_array_equal(result, expected) expected = np.array([False, False, False]) tm.assert_numpy_array_equal(left == pd.NaT, expected) tm.assert_numpy_array_equal(pd.NaT == right, expected) expected = np.array([True, True, True]) tm.assert_numpy_array_equal(left != pd.NaT, expected) tm.assert_numpy_array_equal(pd.NaT != left, expected) expected = np.array([False, False, False]) tm.assert_numpy_array_equal(left < pd.NaT, expected) tm.assert_numpy_array_equal(pd.NaT > left, expected)
def test_factorize_equivalence(self, data_for_grouping, na_sentinel): l1, u1 = pd.factorize(data_for_grouping, na_sentinel=na_sentinel) l2, u2 = data_for_grouping.factorize(na_sentinel=na_sentinel) tm.assert_numpy_array_equal(l1, l2) self.assert_extension_array_equal(u1, u2)
def test_unique_na_fill(arr, fill_value): a = pd.SparseArray(arr, fill_value=fill_value).unique() b = pd.Series(arr).unique() assert isinstance(a, SparseArray) a = np.asarray(a) tm.assert_numpy_array_equal(a, b)
def test_get_indexer_backfill(self): target = RangeIndex(10) indexer = self.index.get_indexer(target, method='backfill') expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected)
def test_coerce_outside_ns_bounds_one_valid(self): arr = np.array(['1/1/1000', '1/1/2000'], dtype=object) result, _ = tslib.array_to_datetime(arr, errors='coerce') expected = [tslib.iNaT, '2000-01-01T00:00:00.000000000-0000'] tm.assert_numpy_array_equal( result, np_array_datetime64_compat(expected, dtype='M8[ns]'))
def test_equals_op(self): # GH9947, GH10637 index_a = self.create_index() if isinstance(index_a, PeriodIndex): pytest.skip("Skip check for PeriodIndex") n = len(index_a) index_b = index_a[0:-1] index_c = index_a[0:-1].append(index_a[-2:-1]) index_d = index_a[0:1] msg = "Lengths must match|could not be broadcast" with pytest.raises(ValueError, match=msg): index_a == index_b expected1 = np.array([True] * n) expected2 = np.array([True] * (n - 1) + [False]) tm.assert_numpy_array_equal(index_a == index_a, expected1) tm.assert_numpy_array_equal(index_a == index_c, expected2) # test comparisons with numpy arrays array_a = np.array(index_a) array_b = np.array(index_a[0:-1]) array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) array_d = np.array(index_a[0:1]) with pytest.raises(ValueError, match=msg): index_a == array_b tm.assert_numpy_array_equal(index_a == array_a, expected1) tm.assert_numpy_array_equal(index_a == array_c, expected2) # test comparisons with Series series_a = Series(array_a) series_b = Series(array_b) series_c = Series(array_c) series_d = Series(array_d) with pytest.raises(ValueError, match=msg): index_a == series_b tm.assert_numpy_array_equal(index_a == series_a, expected1) tm.assert_numpy_array_equal(index_a == series_c, expected2) # cases where length is 1 for one of them with pytest.raises(ValueError, match="Lengths must match"): index_a == index_d with pytest.raises(ValueError, match="Lengths must match"): index_a == series_d with pytest.raises(ValueError, match="Lengths must match"): index_a == array_d msg = "Can only compare identically-labeled Series objects" with pytest.raises(ValueError, match=msg): series_a == series_d with pytest.raises(ValueError, match="Lengths must match"): series_a == array_d # comparing with a scalar should broadcast; note that we are excluding # MultiIndex because in this case each item in the index is a tuple of # length 2, and therefore is considered an array of length 2 in the # comparison instead of a scalar if not isinstance(index_a, MultiIndex): expected3 = np.array([False] * (len(index_a) - 2) + [True, False]) # assuming the 2nd to last item is unique in the data item = index_a[-2] tm.assert_numpy_array_equal(index_a == item, expected3) tm.assert_series_equal(series_a == item, Series(expected3))
def test_shift(self): shifted = self.ts.shift(1) unshifted = shifted.shift(-1) tm.assert_index_equal(shifted.index, self.ts.index) tm.assert_index_equal(unshifted.index, self.ts.index) tm.assert_numpy_array_equal(unshifted.valid().values, self.ts.values[:-1]) offset = BDay() shifted = self.ts.shift(1, freq=offset) unshifted = shifted.shift(-1, freq=offset) assert_series_equal(unshifted, self.ts) unshifted = self.ts.shift(0, freq=offset) assert_series_equal(unshifted, self.ts) shifted = self.ts.shift(1, freq='B') unshifted = shifted.shift(-1, freq='B') assert_series_equal(unshifted, self.ts) # corner case unshifted = self.ts.shift(0) assert_series_equal(unshifted, self.ts) # Shifting with PeriodIndex ps = tm.makePeriodSeries() shifted = ps.shift(1) unshifted = shifted.shift(-1) tm.assert_index_equal(shifted.index, ps.index) tm.assert_index_equal(unshifted.index, ps.index) tm.assert_numpy_array_equal(unshifted.valid().values, ps.values[:-1]) shifted2 = ps.shift(1, 'B') shifted3 = ps.shift(1, BDay()) assert_series_equal(shifted2, shifted3) assert_series_equal(ps, shifted2.shift(-1, 'B')) self.assertRaises(ValueError, ps.shift, freq='D') # legacy support shifted4 = ps.shift(1, freq='B') assert_series_equal(shifted2, shifted4) shifted5 = ps.shift(1, freq=BDay()) assert_series_equal(shifted5, shifted4) # 32-bit taking # GH 8129 index = date_range('2000-01-01', periods=5) for dtype in ['int32', 'int64']: s1 = Series(np.arange(5, dtype=dtype), index=index) p = s1.iloc[1] result = s1.shift(periods=p) expected = Series([np.nan, 0, 1, 2, 3], index=index) assert_series_equal(result, expected) # xref 8260 # with tz s = Series(date_range('2000-01-01 09:00:00', periods=5, tz='US/Eastern'), name='foo') result = s - s.shift() exp = Series(TimedeltaIndex(['NaT'] + ['1 days'] * 4), name='foo') assert_series_equal(result, exp) # incompat tz s2 = Series(date_range('2000-01-01 09:00:00', periods=5, tz='CET'), name='foo') self.assertRaises(ValueError, lambda: s - s2)
def test_get_indexer_backfill(self): target = Int64Index(np.arange(10)) indexer = self.index.get_indexer(target, method='backfill') expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5]) tm.assert_numpy_array_equal(indexer, expected)
def test_get_indexer(self): target = RangeIndex(10) indexer = self.index.get_indexer(target) expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected)
def test_block_internal(self): idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='block') assert isinstance(idx, BlockIndex) self.assertEqual(idx.npoints, 2) tm.assert_numpy_array_equal(idx.blocs, np.array([2], dtype=np.int32)) tm.assert_numpy_array_equal(idx.blengths, np.array([2], dtype=np.int32)) idx = _make_index(4, np.array([], dtype=np.int32), kind='block') assert isinstance(idx, BlockIndex) self.assertEqual(idx.npoints, 0) tm.assert_numpy_array_equal(idx.blocs, np.array([], dtype=np.int32)) tm.assert_numpy_array_equal(idx.blengths, np.array([], dtype=np.int32)) idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind='block') assert isinstance(idx, BlockIndex) self.assertEqual(idx.npoints, 4) tm.assert_numpy_array_equal(idx.blocs, np.array([0], dtype=np.int32)) tm.assert_numpy_array_equal(idx.blengths, np.array([4], dtype=np.int32)) idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind='block') assert isinstance(idx, BlockIndex) self.assertEqual(idx.npoints, 3) tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2], dtype=np.int32)) tm.assert_numpy_array_equal(idx.blengths, np.array([1, 2], dtype=np.int32))
def test_numpy_array_equal_message(self): if is_platform_windows(): pytest.skip("windows has incomparable line-endings " "and uses L on the shape") expected = """numpy array are different numpy array shapes are different \\[left\\]: \\(2,\\) \\[right\\]: \\(3,\\)""" with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5])) with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5])) # scalar comparison expected = """Expected type """ with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(1, 2) expected = """expected 2\\.00000 but got 1\\.00000, with decimal 5""" with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(1, 2) # array / scalar array comparison expected = """numpy array are different numpy array classes are different \\[left\\]: ndarray \\[right\\]: int""" with tm.assert_raises_regex(AssertionError, expected): # numpy_array_equal only accepts np.ndarray assert_numpy_array_equal(np.array([1]), 1) with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(np.array([1]), 1) # scalar / array comparison expected = """numpy array are different numpy array classes are different \\[left\\]: int \\[right\\]: ndarray""" with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(1, np.array([1])) with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(1, np.array([1])) expected = """numpy array are different numpy array values are different \\(66\\.66667 %\\) \\[left\\]: \\[nan, 2\\.0, 3\\.0\\] \\[right\\]: \\[1\\.0, nan, 3\\.0\\]""" with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(np.array([np.nan, 2, 3]), np.array([1, np.nan, 3])) with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(np.array([np.nan, 2, 3]), np.array([1, np.nan, 3])) expected = """numpy array are different numpy array values are different \\(50\\.0 %\\) \\[left\\]: \\[1, 2\\] \\[right\\]: \\[1, 3\\]""" with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(np.array([1, 2]), np.array([1, 3])) with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(np.array([1, 2]), np.array([1, 3])) expected = """numpy array are different numpy array values are different \\(50\\.0 %\\) \\[left\\]: \\[1\\.1, 2\\.000001\\] \\[right\\]: \\[1\\.1, 2.0\\]""" with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(np.array([1.1, 2.000001]), np.array([1.1, 2.0])) # must pass assert_almost_equal(np.array([1.1, 2.000001]), np.array([1.1, 2.0])) expected = """numpy array are different numpy array values are different \\(16\\.66667 %\\) \\[left\\]: \\[\\[1, 2\\], \\[3, 4\\], \\[5, 6\\]\\] \\[right\\]: \\[\\[1, 3\\], \\[3, 4\\], \\[5, 6\\]\\]""" with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(np.array([[1, 2], [3, 4], [5, 6]]), np.array([[1, 3], [3, 4], [5, 6]])) with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(np.array([[1, 2], [3, 4], [5, 6]]), np.array([[1, 3], [3, 4], [5, 6]])) expected = """numpy array are different numpy array values are different \\(25\\.0 %\\) \\[left\\]: \\[\\[1, 2\\], \\[3, 4\\]\\] \\[right\\]: \\[\\[1, 3\\], \\[3, 4\\]\\]""" with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(np.array([[1, 2], [3, 4]]), np.array([[1, 3], [3, 4]])) with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(np.array([[1, 2], [3, 4]]), np.array([[1, 3], [3, 4]])) # allow to overwrite message expected = """Index are different Index shapes are different \\[left\\]: \\(2,\\) \\[right\\]: \\(3,\\)""" with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5]), obj='Index') with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5]), obj='Index')
def test_rank_na_option(self): rankdata = pytest.importorskip('scipy.stats.rankdata') self.frame['A'][::2] = np.nan self.frame['B'][::3] = np.nan self.frame['C'][::4] = np.nan self.frame['D'][::5] = np.nan # bottom ranks0 = self.frame.rank(na_option='bottom') ranks1 = self.frame.rank(1, na_option='bottom') fvals = self.frame.fillna(np.inf).values exp0 = np.apply_along_axis(rankdata, 0, fvals) exp1 = np.apply_along_axis(rankdata, 1, fvals) tm.assert_almost_equal(ranks0.values, exp0) tm.assert_almost_equal(ranks1.values, exp1) # top ranks0 = self.frame.rank(na_option='top') ranks1 = self.frame.rank(1, na_option='top') fval0 = self.frame.fillna((self.frame.min() - 1).to_dict()).values fval1 = self.frame.T fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T fval1 = fval1.fillna(np.inf).values exp0 = np.apply_along_axis(rankdata, 0, fval0) exp1 = np.apply_along_axis(rankdata, 1, fval1) tm.assert_almost_equal(ranks0.values, exp0) tm.assert_almost_equal(ranks1.values, exp1) # descending # bottom ranks0 = self.frame.rank(na_option='top', ascending=False) ranks1 = self.frame.rank(1, na_option='top', ascending=False) fvals = self.frame.fillna(np.inf).values exp0 = np.apply_along_axis(rankdata, 0, -fvals) exp1 = np.apply_along_axis(rankdata, 1, -fvals) tm.assert_almost_equal(ranks0.values, exp0) tm.assert_almost_equal(ranks1.values, exp1) # descending # top ranks0 = self.frame.rank(na_option='bottom', ascending=False) ranks1 = self.frame.rank(1, na_option='bottom', ascending=False) fval0 = self.frame.fillna((self.frame.min() - 1).to_dict()).values fval1 = self.frame.T fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T fval1 = fval1.fillna(np.inf).values exp0 = np.apply_along_axis(rankdata, 0, -fval0) exp1 = np.apply_along_axis(rankdata, 1, -fval1) tm.assert_numpy_array_equal(ranks0.values, exp0) tm.assert_numpy_array_equal(ranks1.values, exp1)
def test_get_indexer(self): target = Int64Index(np.arange(10)) indexer = self.index.get_indexer(target) expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1]) tm.assert_numpy_array_equal(indexer, expected)
def test_convert_downcast_int64(self): from pandas._libs.parsers import na_values arr = np.array([1, 2, 7, 8, 10], dtype=np.int64) expected = np.array([1, 2, 7, 8, 10], dtype=np.int8) # default argument result = lib.downcast_int64(arr, na_values) tm.assert_numpy_array_equal(result, expected) result = lib.downcast_int64(arr, na_values, use_unsigned=False) tm.assert_numpy_array_equal(result, expected) expected = np.array([1, 2, 7, 8, 10], dtype=np.uint8) result = lib.downcast_int64(arr, na_values, use_unsigned=True) tm.assert_numpy_array_equal(result, expected) # still cast to int8 despite use_unsigned=True # because of the negative number as an element arr = np.array([1, 2, -7, 8, 10], dtype=np.int64) expected = np.array([1, 2, -7, 8, 10], dtype=np.int8) result = lib.downcast_int64(arr, na_values, use_unsigned=True) tm.assert_numpy_array_equal(result, expected) arr = np.array([1, 2, 7, 8, 300], dtype=np.int64) expected = np.array([1, 2, 7, 8, 300], dtype=np.int16) result = lib.downcast_int64(arr, na_values) tm.assert_numpy_array_equal(result, expected) int8_na = na_values[np.int8] int64_na = na_values[np.int64] arr = np.array([int64_na, 2, 3, 10, 15], dtype=np.int64) expected = np.array([int8_na, 2, 3, 10, 15], dtype=np.int8) result = lib.downcast_int64(arr, na_values) tm.assert_numpy_array_equal(result, expected)
def test_lookup_array(self): for kind in ['integer', 'block']: idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind=kind) res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) exp = np.array([-1, -1, 0], dtype=np.int32) tm.assert_numpy_array_equal(res, exp) res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) exp = np.array([-1, 0, -1, 1], dtype=np.int32) tm.assert_numpy_array_equal(res, exp) idx = _make_index(4, np.array([], dtype=np.int32), kind=kind) res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32)) exp = np.array([-1, -1, -1, -1], dtype=np.int32) idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind) res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) exp = np.array([-1, 0, 2], dtype=np.int32) tm.assert_numpy_array_equal(res, exp) res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) exp = np.array([-1, 2, 1, 3], dtype=np.int32) tm.assert_numpy_array_equal(res, exp) idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind) res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32)) exp = np.array([1, -1, 2, 0], dtype=np.int32) tm.assert_numpy_array_equal(res, exp) res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32)) exp = np.array([-1, -1, 1, -1], dtype=np.int32) tm.assert_numpy_array_equal(res, exp)
def test_argsort(idx): result = idx.argsort() expected = idx.values.argsort() tm.assert_numpy_array_equal(result, expected)
def test_convert_sql_column_strings(self): arr = np.array(['1.5', None, '3', '4.2'], dtype=object) result = lib.convert_sql_column(arr) expected = np.array(['1.5', np.nan, '3', '4.2'], dtype=object) tm.assert_numpy_array_equal(result, expected)
def test_convert_sql_column_decimals(self): from decimal import Decimal arr = np.array([Decimal('1.5'), None, Decimal('3'), Decimal('4.2')]) result = lib.convert_sql_column(arr) expected = np.array([1.5, np.nan, 3, 4.2], dtype='f8') tm.assert_numpy_array_equal(result, expected)