def test_astype_uint(self): arr = timedelta_range("1H", periods=2) expected = pd.UInt64Index( np.array([3600000000000, 90000000000000], dtype="uint64")) with tm.assert_produces_warning(FutureWarning): tm.assert_index_equal(arr.astype("uint64"), expected) tm.assert_index_equal(arr.astype("uint32"), expected)
def NowcastDecoder(d): if '__class__' in d: if d['__class__'] == 'DataFrame': v = pd.read_parquet(Path(NowcastEncoder.PARQUET_DIR + d['parquet'])) if 'columns_type' in d: if d['columns_type'] == 'DatetimeIndex': if 'columns_freqname' in d: v.columns = pd.DatetimeIndex(v.columns, freq = d['columns_freqname']) else: v.columns = pd.DatetimeIndex(v.columns) elif d['columns_type'] == 'Int64Index': v.columns = pd.Int64Index(v.columns.astype('int64')) elif d['columns_type'] == 'UInt64Index': v.columns = pd.UInt64Index(v.columns.astype('uint64')) elif d['columns_type'] == 'TimedeltaIndex': v.columns = pd.TimedeltaIndex(v.columns.astype(pd.Timedelta)) elif d['columns_type'] == 'RangeIndex': v.columns = pd.RangeIndex( np.int64(d['columns_start']), np.int64(d['columns_stop']), np.int64(d['columns_step'])) return v elif d['__class__'] == 'Timestamp': return pd.Timestamp(d['value']) else: raise ValueError('NowcastDecoder: Unexpected __class__ attribute!') return d
def test_pandas_as_index(): # Define Pandas Indexes pdf_int_index = pd.Int64Index([1, 2, 3, 4, 5]) pdf_uint_index = pd.UInt64Index([1, 2, 3, 4, 5]) pdf_float_index = pd.Float64Index([1.0, 2.0, 3.0, 4.0, 5.0]) pdf_datetime_index = pd.DatetimeIndex( [1000000, 2000000, 3000000, 4000000, 5000000]) pdf_category_index = pd.CategoricalIndex(["a", "b", "c", "b", "a"]) # Define cudf Indexes gdf_int_index = as_index(pdf_int_index) gdf_uint_index = as_index(pdf_uint_index) gdf_float_index = as_index(pdf_float_index) gdf_datetime_index = as_index(pdf_datetime_index) gdf_category_index = as_index(pdf_category_index) # Check instance types assert isinstance(gdf_int_index, GenericIndex) assert isinstance(gdf_uint_index, GenericIndex) assert isinstance(gdf_float_index, GenericIndex) assert isinstance(gdf_datetime_index, DatetimeIndex) assert isinstance(gdf_category_index, CategoricalIndex) # Check equality assert_eq(pdf_int_index, gdf_int_index) assert_eq(pdf_uint_index, gdf_uint_index) assert_eq(pdf_float_index, gdf_float_index) assert_eq(pdf_datetime_index, gdf_datetime_index) assert_eq(pdf_category_index, gdf_category_index) assert_eq( pdf_category_index.codes, gdf_category_index.codes.astype( pdf_category_index.codes.dtype).to_array(), )
def test_unisgned_integer_index_apis(data, name, dtype): pindex = pd.UInt64Index(data, dtype=dtype, name=name) # UInt8Index gindex = cudf.UInt8Index(data, dtype=dtype, name=name) assert_eq(pindex, gindex) assert gindex.dtype == np.dtype("uint8") # UInt16Index gindex = cudf.UInt16Index(data, dtype=dtype, name=name) assert_eq(pindex, gindex) assert gindex.dtype == np.dtype("uint16") # UInt32Index gindex = cudf.UInt32Index(data, dtype=dtype, name=name) assert_eq(pindex, gindex) assert gindex.dtype == np.dtype("uint32") # UInt64Index gindex = cudf.UInt64Index(data, dtype=dtype, name=name) assert_eq(pindex, gindex) assert gindex.dtype == np.dtype("uint64")
def test_astype_uint(self): arr = timedelta_range("1H", periods=2) expected = pd.UInt64Index( np.array([3600000000000, 90000000000000], dtype="uint64")) tm.assert_index_equal(arr.astype("uint64"), expected) tm.assert_index_equal(arr.astype("uint32"), expected)
def setup(self, index_type): N = 10**5 if index_type == "MultiIndex": self.idx = pd.MultiIndex.from_product([ pd.date_range("1/1/2000", freq="T", periods=N // 2), ["a", "b"] ]) elif index_type == "DatetimeIndex": self.idx = pd.date_range("1/1/2000", freq="T", periods=N) elif index_type == "Int64Index": self.idx = pd.Index(range(N)) elif index_type == "PeriodIndex": self.idx = pd.period_range("1/1/2000", freq="T", periods=N) elif index_type == "RangeIndex": self.idx = pd.RangeIndex(start=0, stop=N) elif index_type == "IntervalIndex": self.idx = pd.IntervalIndex.from_arrays(range(N), range(1, N + 1)) elif index_type == "TimedeltaIndex": self.idx = pd.TimedeltaIndex(range(N)) elif index_type == "Float64Index": self.idx = pd.Float64Index(range(N)) elif index_type == "UInt64Index": self.idx = pd.UInt64Index(range(N)) elif index_type == "CategoricalIndex": self.idx = pd.CategoricalIndex(range(N), range(N)) else: raise ValueError assert len(self.idx) == N self.idx._cache = {}
def test_astype_uint(self): arr = date_range('2000', periods=2) expected = pd.UInt64Index( np.array([946684800000000000, 946771200000000000], dtype="uint64")) tm.assert_index_equal(arr.astype("uint64"), expected) tm.assert_index_equal(arr.astype("uint32"), expected)
def setup(self, unique, sort, dtype): N = 10**5 string_index = tm.makeStringIndex(N) string_arrow = None if dtype == "string[pyarrow]": try: string_arrow = pd.array(string_index, dtype="string[pyarrow]") except ImportError: raise NotImplementedError data = { "int": pd.Int64Index(np.arange(N)), "uint": pd.UInt64Index(np.arange(N)), "float": pd.Float64Index(np.random.randn(N)), "object": string_index, "datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N), "datetime64[ns, tz]": pd.date_range("2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"), "Int64": pd.array(np.arange(N), dtype="Int64"), "boolean": pd.array(np.random.randint(0, 2, N), dtype="boolean"), "string[pyarrow]": string_arrow, }[dtype] if not unique: data = data.repeat(5) self.data = data
def test_abc_types(self): assert isinstance(pd.Index(["a", "b", "c"]), gt.ABCIndex) assert isinstance(pd.Int64Index([1, 2, 3]), gt.ABCInt64Index) assert isinstance(pd.UInt64Index([1, 2, 3]), gt.ABCUInt64Index) assert isinstance(pd.Float64Index([1, 2, 3]), gt.ABCFloat64Index) assert isinstance(self.multi_index, gt.ABCMultiIndex) assert isinstance(self.datetime_index, gt.ABCDatetimeIndex) assert isinstance(self.timedelta_index, gt.ABCTimedeltaIndex) assert isinstance(self.period_index, gt.ABCPeriodIndex) assert isinstance(self.categorical_df.index, gt.ABCCategoricalIndex) assert isinstance(pd.Index(["a", "b", "c"]), gt.ABCIndexClass) assert isinstance(pd.Int64Index([1, 2, 3]), gt.ABCIndexClass) assert isinstance(pd.Series([1, 2, 3]), gt.ABCSeries) assert isinstance(self.df, gt.ABCDataFrame) assert isinstance(self.sparse_series, gt.ABCSparseSeries) assert isinstance(self.sparse_array, gt.ABCSparseArray) assert isinstance(self.sparse_frame, gt.ABCSparseDataFrame) assert isinstance(self.categorical, gt.ABCCategorical) assert isinstance(pd.Period("2012", freq="A-DEC"), gt.ABCPeriod) assert isinstance(pd.DateOffset(), gt.ABCDateOffset) assert isinstance( pd.Period("2012", freq="A-DEC").freq, gt.ABCDateOffset) assert not isinstance(pd.Period("2012", freq="A-DEC"), gt.ABCDateOffset) assert isinstance(pd.Interval(0, 1.5), gt.ABCInterval) assert not isinstance(pd.Period("2012", freq="A-DEC"), gt.ABCInterval) assert isinstance(self.datetime_array, gt.ABCDatetimeArray) assert not isinstance(self.datetime_index, gt.ABCDatetimeArray) assert isinstance(self.timedelta_array, gt.ABCTimedeltaArray) assert not isinstance(self.timedelta_index, gt.ABCTimedeltaArray)
def test_uint64_index_roundtrip(self): if self.should_skip: return self.skip('pandas is not importable') idx = pd.UInt64Index([0, 3, 4]) decoded_idx = self.roundtrip(idx) assert_index_equal(decoded_idx, idx)
def test_abc_types(self): assert isinstance(pd.Index(['a', 'b', 'c']), gt.ABCIndex) assert isinstance(pd.Int64Index([1, 2, 3]), gt.ABCInt64Index) assert isinstance(pd.UInt64Index([1, 2, 3]), gt.ABCUInt64Index) assert isinstance(pd.Float64Index([1, 2, 3]), gt.ABCFloat64Index) assert isinstance(self.multi_index, gt.ABCMultiIndex) assert isinstance(self.datetime_index, gt.ABCDatetimeIndex) assert isinstance(self.timedelta_index, gt.ABCTimedeltaIndex) assert isinstance(self.period_index, gt.ABCPeriodIndex) assert isinstance(self.categorical_df.index, gt.ABCCategoricalIndex) assert isinstance(pd.Index(['a', 'b', 'c']), gt.ABCIndexClass) assert isinstance(pd.Int64Index([1, 2, 3]), gt.ABCIndexClass) assert isinstance(pd.Series([1, 2, 3]), gt.ABCSeries) assert isinstance(self.df, gt.ABCDataFrame) with catch_warnings(record=True): assert isinstance(self.df.to_panel(), gt.ABCPanel) assert isinstance(self.sparse_series, gt.ABCSparseSeries) assert isinstance(self.sparse_array, gt.ABCSparseArray) assert isinstance(self.sparse_frame, gt.ABCSparseDataFrame) assert isinstance(self.categorical, gt.ABCCategorical) assert isinstance(pd.Period('2012', freq='A-DEC'), gt.ABCPeriod) assert isinstance(pd.DateOffset(), gt.ABCDateOffset) assert isinstance(pd.Period('2012', freq='A-DEC').freq, gt.ABCDateOffset) assert not isinstance(pd.Period('2012', freq='A-DEC'), gt.ABCDateOffset) assert isinstance(pd.Interval(0, 1.5), gt.ABCInterval) assert not isinstance(pd.Period('2012', freq='A-DEC'), gt.ABCInterval)
def setup(self, sort, dtype): N = 10**5 data = { 'int': pd.Int64Index(np.arange(N).repeat(5)), 'uint': pd.UInt64Index(np.arange(N).repeat(5)), 'float': pd.Float64Index(np.random.randn(N).repeat(5)), 'string': tm.makeStringIndex(N).repeat(5) } self.idx = data[dtype]
def test_astype_uint(self): arr = date_range("2000", periods=2, name="idx") expected = pd.UInt64Index( np.array([946684800000000000, 946771200000000000], dtype="uint64"), name="idx", ) with tm.assert_produces_warning(FutureWarning): tm.assert_index_equal(arr.astype("uint64"), expected) tm.assert_index_equal(arr.astype("uint32"), expected)
def setup(self, sort, dtype): N = 10**5 data = { "int": pd.Int64Index(np.arange(N).repeat(5)), "uint": pd.UInt64Index(np.arange(N).repeat(5)), "float": pd.Float64Index(np.random.randn(N).repeat(5)), "string": tm.makeStringIndex(N).repeat(5), } self.idx = data[dtype]
def setup(self, sort, dtype): N = 10**5 data = { 'int': pd.Int64Index(np.arange(N)), 'uint': pd.UInt64Index(np.arange(N)), 'float': pd.Float64Index(np.arange(N)), 'string': tm.makeStringIndex(N) } self.idx = data[dtype] assert self.idx.is_unique
def setup(self, sort, dtype): N = 10**5 data = { "int": pd.Int64Index(np.arange(N)), "uint": pd.UInt64Index(np.arange(N)), "float": pd.Float64Index(np.arange(N)), "string": tm.makeStringIndex(N), } self.idx = data[dtype] assert self.idx.is_unique
def setup(self, dtype): N = 10**5 data = { 'int': pd.Int64Index(np.arange(N)), 'uint': pd.UInt64Index(np.arange(N)), 'float': pd.Float64Index(np.random.randn(N)), 'string': tm.makeStringIndex(N) } self.idx = data[dtype] # cache is_unique self.idx.is_unique
def setup(self, dtype): N = 10**5 data = { "int": pd.Int64Index(np.arange(N)), "uint": pd.UInt64Index(np.arange(N)), "float": pd.Float64Index(np.random.randn(N)), "string": tm.makeStringIndex(N), } self.idx = data[dtype] # cache is_unique self.idx.is_unique
def cmp_cumfreq(df): '''Bla bla ''' val_counts = df.value_counts(normalize=True) val_counts = val_counts.sort_index() max_val = val_counts.index.max() new_ind = pd.UInt64Index(range(int(max_val) + 1), name='rental_events') val_counts = val_counts.reindex(new_ind, fill_value=0.0) val_counts = val_counts.cumsum() return val_counts
def test_contains_with_float_index(self): # GH#22085 integer_index = pd.Int64Index([0, 1, 2, 3]) uinteger_index = pd.UInt64Index([0, 1, 2, 3]) float_index = pd.Float64Index([0.1, 1.1, 2.2, 3.3]) for index in (integer_index, uinteger_index): assert 1.1 not in index assert 1.0 in index assert 1 in index assert 1.1 in float_index assert 1.0 not in float_index assert 1 not in float_index
def setup(self, unique, sort, dtype): N = 10 ** 5 data = { "int": pd.Int64Index(np.arange(N)), "uint": pd.UInt64Index(np.arange(N)), "float": pd.Float64Index(np.random.randn(N)), "string": tm.makeStringIndex(N), "datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N), "datetime64[ns, tz]": pd.date_range( "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo" ), "Int64": pd.array(np.arange(N), dtype="Int64"), "boolean": pd.array(np.random.randint(0, 2, N), dtype="boolean"), }[dtype] if not unique: data = data.repeat(5) self.data = data
def setup(self, unique, keep, dtype): N = 10 ** 5 data = { "int": pd.Int64Index(np.arange(N)), "uint": pd.UInt64Index(np.arange(N)), "float": pd.Float64Index(np.random.randn(N)), "string": tm.makeStringIndex(N), "datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N), "datetime64[ns, tz]": pd.date_range( "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo" ), }[dtype] if not unique: data = data.repeat(5) self.idx = data # cache is_unique self.idx.is_unique
def test_abc_types(self): self.assertIsInstance(pd.Index(['a', 'b', 'c']), gt.ABCIndex) self.assertIsInstance(pd.Int64Index([1, 2, 3]), gt.ABCInt64Index) self.assertIsInstance(pd.UInt64Index([1, 2, 3]), gt.ABCUInt64Index) self.assertIsInstance(pd.Float64Index([1, 2, 3]), gt.ABCFloat64Index) self.assertIsInstance(self.multi_index, gt.ABCMultiIndex) self.assertIsInstance(self.datetime_index, gt.ABCDatetimeIndex) self.assertIsInstance(self.timedelta_index, gt.ABCTimedeltaIndex) self.assertIsInstance(self.period_index, gt.ABCPeriodIndex) self.assertIsInstance(self.categorical_df.index, gt.ABCCategoricalIndex) self.assertIsInstance(pd.Index(['a', 'b', 'c']), gt.ABCIndexClass) self.assertIsInstance(pd.Int64Index([1, 2, 3]), gt.ABCIndexClass) self.assertIsInstance(pd.Series([1, 2, 3]), gt.ABCSeries) self.assertIsInstance(self.df, gt.ABCDataFrame) self.assertIsInstance(self.df.to_panel(), gt.ABCPanel) self.assertIsInstance(self.sparse_series, gt.ABCSparseSeries) self.assertIsInstance(self.sparse_array, gt.ABCSparseArray) self.assertIsInstance(self.categorical, gt.ABCCategorical) self.assertIsInstance(pd.Period('2012', freq='A-DEC'), gt.ABCPeriod)
def test_abc_types(self): assert isinstance(pd.Int64Index([1, 2, 3]), gt.ABCInt64Index) assert isinstance(pd.UInt64Index([1, 2, 3]), gt.ABCUInt64Index) assert isinstance(pd.Float64Index([1, 2, 3]), gt.ABCFloat64Index) assert isinstance(self.multi_index, gt.ABCMultiIndex) assert isinstance(self.datetime_index, gt.ABCDatetimeIndex) assert isinstance(self.timedelta_index, gt.ABCTimedeltaIndex) assert isinstance(self.period_index, gt.ABCPeriodIndex) assert isinstance(self.categorical_df.index, gt.ABCCategoricalIndex) assert isinstance(pd.Index(["a", "b", "c"]), gt.ABCIndexClass) assert isinstance(pd.Int64Index([1, 2, 3]), gt.ABCIndexClass) assert isinstance(pd.Series([1, 2, 3]), gt.ABCSeries) assert isinstance(self.df, gt.ABCDataFrame) assert isinstance(self.sparse_array, gt.ABCExtensionArray) assert isinstance(self.categorical, gt.ABCCategorical) assert isinstance(self.datetime_array, gt.ABCDatetimeArray) assert not isinstance(self.datetime_index, gt.ABCDatetimeArray) assert isinstance(self.timedelta_array, gt.ABCTimedeltaArray) assert not isinstance(self.timedelta_index, gt.ABCTimedeltaArray)
def test_meta_nonempty_uint64index(): idx = pd.UInt64Index([1], name='foo') res = meta_nonempty(idx) assert type(res) is pd.UInt64Index assert res.name == idx.name
class TestFancy(Base): """ pure get/set item & fancy indexing """ def test_setitem_ndarray_1d(self): # GH5508 # len of indexer vs length of the 1d ndarray df = DataFrame(index=Index(lrange(1, 11))) df['foo'] = np.zeros(10, dtype=np.float64) df['bar'] = np.zeros(10, dtype=np.complex) # invalid def f(): df.loc[df.index[2:5], 'bar'] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0]) pytest.raises(ValueError, f) # valid df.loc[df.index[2:6], 'bar'] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0]) result = df.loc[df.index[2:6], 'bar'] expected = Series([2.33j, 1.23 + 0.1j, 2.2, 1.0], index=[3, 4, 5, 6], name='bar') tm.assert_series_equal(result, expected) # dtype getting changed? df = DataFrame(index=Index(lrange(1, 11))) df['foo'] = np.zeros(10, dtype=np.float64) df['bar'] = np.zeros(10, dtype=np.complex) def f(): df[2:5] = np.arange(1, 4) * 1j pytest.raises(ValueError, f) def test_inf_upcast(self): # GH 16957 # We should be able to use np.inf as a key # np.inf should cause an index to convert to float # Test with np.inf in rows df = pd.DataFrame(columns=[0]) df.loc[1] = 1 df.loc[2] = 2 df.loc[np.inf] = 3 # make sure we can look up the value assert df.loc[np.inf, 0] == 3 result = df.index expected = pd.Float64Index([1, 2, np.inf]) tm.assert_index_equal(result, expected) # Test with np.inf in columns df = pd.DataFrame() df.loc[0, 0] = 1 df.loc[1, 1] = 2 df.loc[0, np.inf] = 3 result = df.columns expected = pd.Float64Index([0, 1, np.inf]) tm.assert_index_equal(result, expected) def test_setitem_dtype_upcast(self): # GH3216 df = DataFrame([{"a": 1}, {"a": 3, "b": 2}]) df['c'] = np.nan assert df['c'].dtype == np.float64 df.loc[0, 'c'] = 'foo' expected = DataFrame([{ "a": 1, "c": 'foo' }, { "a": 3, "b": 2, "c": np.nan }]) tm.assert_frame_equal(df, expected) # GH10280 df = DataFrame(np.arange(6, dtype='int64').reshape(2, 3), index=list('ab'), columns=['foo', 'bar', 'baz']) for val in [3.14, 'wxyz']: left = df.copy() left.loc['a', 'bar'] = val right = DataFrame([[0, val, 2], [3, 4, 5]], index=list('ab'), columns=['foo', 'bar', 'baz']) tm.assert_frame_equal(left, right) assert is_integer_dtype(left['foo']) assert is_integer_dtype(left['baz']) left = DataFrame(np.arange(6, dtype='int64').reshape(2, 3) / 10.0, index=list('ab'), columns=['foo', 'bar', 'baz']) left.loc['a', 'bar'] = 'wxyz' right = DataFrame([[0, 'wxyz', .2], [.3, .4, .5]], index=list('ab'), columns=['foo', 'bar', 'baz']) tm.assert_frame_equal(left, right) assert is_float_dtype(left['foo']) assert is_float_dtype(left['baz']) def test_dups_fancy_indexing(self): # GH 3455 from pandas.util.testing import makeCustomDataframe as mkdf df = mkdf(10, 3) df.columns = ['a', 'a', 'b'] result = df[['b', 'a']].columns expected = Index(['b', 'a', 'a']) tm.assert_index_equal(result, expected) # across dtypes df = DataFrame([[1, 2, 1., 2., 3., 'foo', 'bar']], columns=list('aaaaaaa')) df.head() str(df) result = DataFrame([[1, 2, 1., 2., 3., 'foo', 'bar']]) result.columns = list('aaaaaaa') # TODO(wesm): unused? df_v = df.iloc[:, 4] # noqa res_v = result.iloc[:, 4] # noqa tm.assert_frame_equal(df, result) # GH 3561, dups not in selected order df = DataFrame( { 'test': [5, 7, 9, 11], 'test1': [4., 5, 6, 7], 'other': list('abcd') }, index=['A', 'A', 'B', 'C']) rows = ['C', 'B'] expected = DataFrame( { 'test': [11, 9], 'test1': [7., 6], 'other': ['d', 'c'] }, index=rows) result = df.loc[rows] tm.assert_frame_equal(result, expected) result = df.loc[Index(rows)] tm.assert_frame_equal(result, expected) rows = ['C', 'B', 'E'] expected = DataFrame( { 'test': [11, 9, np.nan], 'test1': [7., 6, np.nan], 'other': ['d', 'c', np.nan] }, index=rows) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.loc[rows] tm.assert_frame_equal(result, expected) # see GH5553, make sure we use the right indexer rows = ['F', 'G', 'H', 'C', 'B', 'E'] expected = DataFrame( { 'test': [np.nan, np.nan, np.nan, 11, 9, np.nan], 'test1': [np.nan, np.nan, np.nan, 7., 6, np.nan], 'other': [np.nan, np.nan, np.nan, 'd', 'c', np.nan] }, index=rows) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.loc[rows] tm.assert_frame_equal(result, expected) # inconsistent returns for unique/duplicate indices when values are # missing df = DataFrame(np.random.randn(4, 3), index=list('ABCD')) expected = df.reindex(['E']) dfnu = DataFrame(np.random.randn(5, 3), index=list('AABCD')) with catch_warnings(record=True): result = dfnu.ix[['E']] tm.assert_frame_equal(result, expected) # ToDo: check_index_type can be True after GH 11497 # GH 4619; duplicate indexer with missing label df = DataFrame({"A": [0, 1, 2]}) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.loc[[0, 8, 0]] expected = DataFrame({"A": [0, np.nan, 0]}, index=[0, 8, 0]) tm.assert_frame_equal(result, expected, check_index_type=False) df = DataFrame({"A": list('abc')}) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.loc[[0, 8, 0]] expected = DataFrame({"A": ['a', np.nan, 'a']}, index=[0, 8, 0]) tm.assert_frame_equal(result, expected, check_index_type=False) # non unique with non unique selector df = DataFrame({'test': [5, 7, 9, 11]}, index=['A', 'A', 'B', 'C']) expected = DataFrame({'test': [5, 7, 5, 7, np.nan]}, index=['A', 'A', 'A', 'A', 'E']) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.loc[['A', 'A', 'E']] tm.assert_frame_equal(result, expected) # GH 5835 # dups on index and missing values df = DataFrame(np.random.randn(5, 5), columns=['A', 'B', 'B', 'B', 'A']) expected = pd.concat([ df.loc[:, ['A', 'B']], DataFrame(np.nan, columns=['C'], index=df.index) ], axis=1) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.loc[:, ['A', 'B', 'C']] tm.assert_frame_equal(result, expected) # GH 6504, multi-axis indexing df = DataFrame(np.random.randn(9, 2), index=[1, 1, 1, 2, 2, 2, 3, 3, 3], columns=['a', 'b']) expected = df.iloc[0:6] result = df.loc[[1, 2]] tm.assert_frame_equal(result, expected) expected = df result = df.loc[:, ['a', 'b']] tm.assert_frame_equal(result, expected) expected = df.iloc[0:6, :] result = df.loc[[1, 2], ['a', 'b']] tm.assert_frame_equal(result, expected) def test_indexing_mixed_frame_bug(self): # GH3492 df = DataFrame({ 'a': { 1: 'aaa', 2: 'bbb', 3: 'ccc' }, 'b': { 1: 111, 2: 222, 3: 333 } }) # this works, new column is created correctly df['test'] = df['a'].apply(lambda x: '_' if x == 'aaa' else x) # this does not work, ie column test is not changed idx = df['test'] == '_' temp = df.loc[idx, 'a'].apply(lambda x: '-----' if x == 'aaa' else x) df.loc[idx, 'test'] = temp assert df.iloc[0, 2] == '-----' # if I look at df, then element [0,2] equals '_'. If instead I type # df.ix[idx,'test'], I get '-----', finally by typing df.iloc[0,2] I # get '_'. def test_multitype_list_index_access(self): # GH 10610 df = pd.DataFrame(np.random.random((10, 5)), columns=["a"] + [20, 21, 22, 23]) with pytest.raises(KeyError): df[[22, 26, -8]] assert df[21].shape[0] == df.shape[0] def test_set_index_nan(self): # GH 3586 df = DataFrame({ 'PRuid': { 17: 'nonQC', 18: 'nonQC', 19: 'nonQC', 20: '10', 21: '11', 22: '12', 23: '13', 24: '24', 25: '35', 26: '46', 27: '47', 28: '48', 29: '59', 30: '10' }, 'QC': { 17: 0.0, 18: 0.0, 19: 0.0, 20: np.nan, 21: np.nan, 22: np.nan, 23: np.nan, 24: 1.0, 25: np.nan, 26: np.nan, 27: np.nan, 28: np.nan, 29: np.nan, 30: np.nan }, 'data': { 17: 7.9544899999999998, 18: 8.0142609999999994, 19: 7.8591520000000008, 20: 0.86140349999999999, 21: 0.87853110000000001, 22: 0.8427041999999999, 23: 0.78587700000000005, 24: 0.73062459999999996, 25: 0.81668560000000001, 26: 0.81927080000000008, 27: 0.80705009999999999, 28: 0.81440240000000008, 29: 0.80140849999999997, 30: 0.81307740000000006 }, 'year': { 17: 2006, 18: 2007, 19: 2008, 20: 1985, 21: 1985, 22: 1985, 23: 1985, 24: 1985, 25: 1985, 26: 1985, 27: 1985, 28: 1985, 29: 1985, 30: 1986 } }).reset_index() result = df.set_index(['year', 'PRuid', 'QC']).reset_index().reindex(columns=df.columns) tm.assert_frame_equal(result, df) def test_multi_nan_indexing(self): # GH 3588 df = DataFrame({ "a": ['R1', 'R2', np.nan, 'R4'], 'b': ["C1", "C2", "C3", "C4"], "c": [10, 15, np.nan, 20] }) result = df.set_index(['a', 'b'], drop=False) expected = DataFrame( { "a": ['R1', 'R2', np.nan, 'R4'], 'b': ["C1", "C2", "C3", "C4"], "c": [10, 15, np.nan, 20] }, index=[ Index(['R1', 'R2', np.nan, 'R4'], name='a'), Index(['C1', 'C2', 'C3', 'C4'], name='b') ]) tm.assert_frame_equal(result, expected) def test_multi_assign(self): # GH 3626, an assignement of a sub-df to a df df = DataFrame({ 'FC': ['a', 'b', 'a', 'b', 'a', 'b'], 'PF': [0, 0, 0, 0, 1, 1], 'col1': lrange(6), 'col2': lrange(6, 12) }) df.iloc[1, 0] = np.nan df2 = df.copy() mask = ~df2.FC.isna() cols = ['col1', 'col2'] dft = df2 * 2 dft.iloc[3, 3] = np.nan expected = DataFrame({ 'FC': ['a', np.nan, 'a', 'b', 'a', 'b'], 'PF': [0, 0, 0, 0, 1, 1], 'col1': Series([0, 1, 4, 6, 8, 10]), 'col2': [12, 7, 16, np.nan, 20, 22] }) # frame on rhs df2.loc[mask, cols] = dft.loc[mask, cols] tm.assert_frame_equal(df2, expected) df2.loc[mask, cols] = dft.loc[mask, cols] tm.assert_frame_equal(df2, expected) # with an ndarray on rhs # coerces to float64 because values has float64 dtype # GH 14001 expected = DataFrame({ 'FC': ['a', np.nan, 'a', 'b', 'a', 'b'], 'PF': [0, 0, 0, 0, 1, 1], 'col1': [0., 1., 4., 6., 8., 10.], 'col2': [12, 7, 16, np.nan, 20, 22] }) df2 = df.copy() df2.loc[mask, cols] = dft.loc[mask, cols].values tm.assert_frame_equal(df2, expected) df2.loc[mask, cols] = dft.loc[mask, cols].values tm.assert_frame_equal(df2, expected) # broadcasting on the rhs is required df = DataFrame( dict(A=[1, 2, 0, 0, 0], B=[0, 0, 0, 10, 11], C=[0, 0, 0, 10, 11], D=[3, 4, 5, 6, 7])) expected = df.copy() mask = expected['A'] == 0 for col in ['A', 'B']: expected.loc[mask, col] = df['D'] df.loc[df['A'] == 0, ['A', 'B']] = df['D'] tm.assert_frame_equal(df, expected) def test_setitem_list(self): # GH 6043 # ix with a list df = DataFrame(index=[0, 1], columns=[0]) with catch_warnings(record=True): df.ix[1, 0] = [1, 2, 3] df.ix[1, 0] = [1, 2] result = DataFrame(index=[0, 1], columns=[0]) with catch_warnings(record=True): result.ix[1, 0] = [1, 2] tm.assert_frame_equal(result, df) # ix with an object class TO(object): def __init__(self, value): self.value = value def __str__(self): return "[{0}]".format(self.value) __repr__ = __str__ def __eq__(self, other): return self.value == other.value def view(self): return self df = DataFrame(index=[0, 1], columns=[0]) with catch_warnings(record=True): df.ix[1, 0] = TO(1) df.ix[1, 0] = TO(2) result = DataFrame(index=[0, 1], columns=[0]) with catch_warnings(record=True): result.ix[1, 0] = TO(2) tm.assert_frame_equal(result, df) # remains object dtype even after setting it back df = DataFrame(index=[0, 1], columns=[0]) with catch_warnings(record=True): df.ix[1, 0] = TO(1) df.ix[1, 0] = np.nan result = DataFrame(index=[0, 1], columns=[0]) tm.assert_frame_equal(result, df) def test_string_slice(self): # GH 14424 # string indexing against datetimelike with object # dtype should properly raises KeyError df = pd.DataFrame([1], pd.Index([pd.Timestamp('2011-01-01')], dtype=object)) assert df.index.is_all_dates with pytest.raises(KeyError): df['2011'] with pytest.raises(KeyError): df.loc['2011', 0] df = pd.DataFrame() assert not df.index.is_all_dates with pytest.raises(KeyError): df['2011'] with pytest.raises(KeyError): df.loc['2011', 0] def test_mi_access(self): # GH 4145 data = """h1 apis h3 sub h5 0 a A 1 A1 1 1 b B 2 B1 2 2 c B 3 A1 3 3 d A 4 B2 4 4 e A 5 B2 5 5 f B 6 A2 6 """ df = pd.read_csv(StringIO(data), sep=r'\s+', index_col=0) df2 = df.set_index(['apis', 'sub']).T.sort_index(1) index = Index(['h1', 'h3', 'h5']) columns = MultiIndex.from_tuples([('A', 'A1')], names=['apis', 'sub']) expected = DataFrame([['a', 1, 1]], index=columns, columns=index).T result = df2.loc[:, ('A', 'A1')] tm.assert_frame_equal(result, expected) result = df2[('A', 'A1')] tm.assert_frame_equal(result, expected) # GH 4146, not returning a block manager when selecting a unique index # from a duplicate index # as of 4879, this returns a Series (which is similar to what happens # with a non-unique) expected = Series(['a', 1, 1], index=['h1', 'h3', 'h5'], name='A1') result = df2['A']['A1'] tm.assert_series_equal(result, expected) # selecting a non_unique from the 2nd level expected = DataFrame( [['d', 4, 4], ['e', 5, 5]], index=Index(['B2', 'B2'], name='sub'), columns=['h1', 'h3', 'h5'], ).T result = df2['A']['B2'] tm.assert_frame_equal(result, expected) def test_astype_assignment(self): # GH4312 (iloc) df_orig = DataFrame([['1', '2', '3', '.4', 5, 6., 'foo']], columns=list('ABCDEFG')) df = df_orig.copy() df.iloc[:, 0:2] = df.iloc[:, 0:2].astype(np.int64) expected = DataFrame([[1, 2, '3', '.4', 5, 6., 'foo']], columns=list('ABCDEFG')) tm.assert_frame_equal(df, expected) df = df_orig.copy() df.iloc[:, 0:2] = df.iloc[:, 0:2]._convert(datetime=True, numeric=True) expected = DataFrame([[1, 2, '3', '.4', 5, 6., 'foo']], columns=list('ABCDEFG')) tm.assert_frame_equal(df, expected) # GH5702 (loc) df = df_orig.copy() df.loc[:, 'A'] = df.loc[:, 'A'].astype(np.int64) expected = DataFrame([[1, '2', '3', '.4', 5, 6., 'foo']], columns=list('ABCDEFG')) tm.assert_frame_equal(df, expected) df = df_orig.copy() df.loc[:, ['B', 'C']] = df.loc[:, ['B', 'C']].astype(np.int64) expected = DataFrame([['1', 2, 3, '.4', 5, 6., 'foo']], columns=list('ABCDEFG')) tm.assert_frame_equal(df, expected) # full replacements / no nans df = DataFrame({'A': [1., 2., 3., 4.]}) df.iloc[:, 0] = df['A'].astype(np.int64) expected = DataFrame({'A': [1, 2, 3, 4]}) tm.assert_frame_equal(df, expected) df = DataFrame({'A': [1., 2., 3., 4.]}) df.loc[:, 'A'] = df['A'].astype(np.int64) expected = DataFrame({'A': [1, 2, 3, 4]}) tm.assert_frame_equal(df, expected) def test_astype_assignment_with_dups(self): # GH 4686 # assignment with dups that has a dtype change cols = pd.MultiIndex.from_tuples([('A', '1'), ('B', '1'), ('A', '2')]) df = DataFrame(np.arange(3).reshape((1, 3)), columns=cols, dtype=object) index = df.index.copy() df['A'] = df['A'].astype(np.float64) tm.assert_index_equal(df.index, index) # TODO(wesm): unused variables # result = df.get_dtype_counts().sort_index() # expected = Series({'float64': 2, 'object': 1}).sort_index() @pytest.mark.parametrize("index,val", [ (pd.Index([0, 1, 2]), 2), (pd.Index([0, 1, '2']), '2'), (pd.Index([0, 1, 2, np.inf, 4]), 4), (pd.Index([0, 1, 2, np.nan, 4]), 4), (pd.Index([0, 1, 2, np.inf]), np.inf), (pd.Index([0, 1, 2, np.nan]), np.nan), ]) def test_index_contains(self, index, val): assert val in index @pytest.mark.parametrize( "index,val", [ (pd.Index([0, 1, 2]), '2'), (pd.Index([0, 1, '2']), 2), (pd.Index([0, 1, 2, np.inf]), 4), (pd.Index([0, 1, 2, np.nan]), 4), (pd.Index([0, 1, 2, np.inf]), np.nan), (pd.Index([0, 1, 2, np.nan]), np.inf), # Checking if np.inf in Int64Index should not cause an OverflowError # Related to GH 16957 (pd.Int64Index([0, 1, 2]), np.inf), (pd.Int64Index([0, 1, 2]), np.nan), (pd.UInt64Index([0, 1, 2]), np.inf), (pd.UInt64Index([0, 1, 2]), np.nan), ]) def test_index_not_contains(self, index, val): assert val not in index def test_index_type_coercion(self): with catch_warnings(record=True): # GH 11836 # if we have an index type and set it with something that looks # to numpy like the same, but is actually, not # (e.g. setting with a float or string '0') # then we need to coerce to object # integer indexes for s in [Series(range(5)), Series(range(5), index=range(1, 6))]: assert s.index.is_integer() for indexer in [lambda x: x.ix, lambda x: x.loc, lambda x: x]: s2 = s.copy() indexer(s2)[0.1] = 0 assert s2.index.is_floating() assert indexer(s2)[0.1] == 0 s2 = s.copy() indexer(s2)[0.0] = 0 exp = s.index if 0 not in s: exp = Index(s.index.tolist() + [0]) tm.assert_index_equal(s2.index, exp) s2 = s.copy() indexer(s2)['0'] = 0 assert s2.index.is_object() for s in [Series(range(5), index=np.arange(5.))]: assert s.index.is_floating() for idxr in [lambda x: x.ix, lambda x: x.loc, lambda x: x]: s2 = s.copy() idxr(s2)[0.1] = 0 assert s2.index.is_floating() assert idxr(s2)[0.1] == 0 s2 = s.copy() idxr(s2)[0.0] = 0 tm.assert_index_equal(s2.index, s.index) s2 = s.copy() idxr(s2)['0'] = 0 assert s2.index.is_object()
def test_astype_column_metadata(self, dtype): # GH 19920 columns = pd.UInt64Index([100, 200, 300], name="foo") df = DataFrame(np.arange(15).reshape(5, 3), columns=columns) df = df.astype(dtype) tm.assert_index_equal(df.columns, columns)
class TestFancy(Base): """ pure get/set item & fancy indexing """ def test_setitem_ndarray_1d(self): # GH5508 # len of indexer vs length of the 1d ndarray df = DataFrame(index=Index(np.arange(1, 11))) df["foo"] = np.zeros(10, dtype=np.float64) df["bar"] = np.zeros(10, dtype=np.complex) # invalid with pytest.raises(ValueError): df.loc[df.index[2:5], "bar"] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0]) # valid df.loc[df.index[2:6], "bar"] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0]) result = df.loc[df.index[2:6], "bar"] expected = Series( [2.33j, 1.23 + 0.1j, 2.2, 1.0], index=[3, 4, 5, 6], name="bar" ) tm.assert_series_equal(result, expected) # dtype getting changed? df = DataFrame(index=Index(np.arange(1, 11))) df["foo"] = np.zeros(10, dtype=np.float64) df["bar"] = np.zeros(10, dtype=np.complex) with pytest.raises(ValueError): df[2:5] = np.arange(1, 4) * 1j @pytest.mark.parametrize( "index", tm.all_index_generator(5), ids=lambda x: type(x).__name__ ) @pytest.mark.parametrize( "obj", [ lambda i: Series(np.arange(len(i)), index=i), lambda i: DataFrame(np.random.randn(len(i), len(i)), index=i, columns=i), ], ids=["Series", "DataFrame"], ) @pytest.mark.parametrize( "idxr, idxr_id", [ (lambda x: x, "getitem"), (lambda x: x.loc, "loc"), (lambda x: x.iloc, "iloc"), pytest.param(lambda x: x.ix, "ix", marks=ignore_ix), ], ) def test_getitem_ndarray_3d(self, index, obj, idxr, idxr_id): # GH 25567 obj = obj(index) idxr = idxr(obj) nd3 = np.random.randint(5, size=(2, 2, 2)) msg = ( r"Buffer has wrong number of dimensions \(expected 1," r" got 3\)|" "The truth value of an array with more than one element is" " ambiguous|" "Cannot index with multidimensional key|" r"Wrong number of dimensions. values.ndim != ndim \[3 != 1\]|" "No matching signature found|" # TypeError "unhashable type: 'numpy.ndarray'" # TypeError ) if ( isinstance(obj, Series) and idxr_id == "getitem" and index.inferred_type in [ "string", "datetime64", "period", "timedelta64", "boolean", "categorical", ] ): idxr[nd3] else: if ( isinstance(obj, DataFrame) and idxr_id == "getitem" and index.inferred_type == "boolean" ): error = TypeError elif idxr_id == "getitem" and index.inferred_type == "interval": error = TypeError else: error = ValueError with pytest.raises(error, match=msg): idxr[nd3] @pytest.mark.parametrize( "index", tm.all_index_generator(5), ids=lambda x: type(x).__name__ ) @pytest.mark.parametrize( "obj", [ lambda i: Series(np.arange(len(i)), index=i), lambda i: DataFrame(np.random.randn(len(i), len(i)), index=i, columns=i), ], ids=["Series", "DataFrame"], ) @pytest.mark.parametrize( "idxr, idxr_id", [ (lambda x: x, "setitem"), (lambda x: x.loc, "loc"), (lambda x: x.iloc, "iloc"), pytest.param(lambda x: x.ix, "ix", marks=ignore_ix), ], ) def test_setitem_ndarray_3d(self, index, obj, idxr, idxr_id): # GH 25567 obj = obj(index) idxr = idxr(obj) nd3 = np.random.randint(5, size=(2, 2, 2)) msg = ( r"Buffer has wrong number of dimensions \(expected 1," r" got 3\)|" "The truth value of an array with more than one element is" " ambiguous|" "Only 1-dimensional input arrays are supported|" "'pandas._libs.interval.IntervalTree' object has no attribute" " 'set_value'|" # AttributeError "unhashable type: 'numpy.ndarray'|" # TypeError "No matching signature found|" # TypeError r"^\[\[\[" # pandas.core.indexing.IndexingError ) if ( (idxr_id == "iloc") or ( ( isinstance(obj, Series) and idxr_id == "setitem" and index.inferred_type in [ "floating", "string", "datetime64", "period", "timedelta64", "boolean", "categorical", ] ) ) or ( idxr_id == "ix" and index.inferred_type in ["string", "datetime64", "period", "boolean"] ) ): idxr[nd3] = 0 else: with pytest.raises( (ValueError, AttributeError, TypeError, pd.core.indexing.IndexingError), match=msg, ): idxr[nd3] = 0 def test_inf_upcast(self): # GH 16957 # We should be able to use np.inf as a key # np.inf should cause an index to convert to float # Test with np.inf in rows df = DataFrame(columns=[0]) df.loc[1] = 1 df.loc[2] = 2 df.loc[np.inf] = 3 # make sure we can look up the value assert df.loc[np.inf, 0] == 3 result = df.index expected = pd.Float64Index([1, 2, np.inf]) tm.assert_index_equal(result, expected) # Test with np.inf in columns df = DataFrame() df.loc[0, 0] = 1 df.loc[1, 1] = 2 df.loc[0, np.inf] = 3 result = df.columns expected = pd.Float64Index([0, 1, np.inf]) tm.assert_index_equal(result, expected) def test_setitem_dtype_upcast(self): # GH3216 df = DataFrame([{"a": 1}, {"a": 3, "b": 2}]) df["c"] = np.nan assert df["c"].dtype == np.float64 df.loc[0, "c"] = "foo" expected = DataFrame([{"a": 1, "c": "foo"}, {"a": 3, "b": 2, "c": np.nan}]) tm.assert_frame_equal(df, expected) # GH10280 df = DataFrame( np.arange(6, dtype="int64").reshape(2, 3), index=list("ab"), columns=["foo", "bar", "baz"], ) for val in [3.14, "wxyz"]: left = df.copy() left.loc["a", "bar"] = val right = DataFrame( [[0, val, 2], [3, 4, 5]], index=list("ab"), columns=["foo", "bar", "baz"], ) tm.assert_frame_equal(left, right) assert is_integer_dtype(left["foo"]) assert is_integer_dtype(left["baz"]) left = DataFrame( np.arange(6, dtype="int64").reshape(2, 3) / 10.0, index=list("ab"), columns=["foo", "bar", "baz"], ) left.loc["a", "bar"] = "wxyz" right = DataFrame( [[0, "wxyz", 0.2], [0.3, 0.4, 0.5]], index=list("ab"), columns=["foo", "bar", "baz"], ) tm.assert_frame_equal(left, right) assert is_float_dtype(left["foo"]) assert is_float_dtype(left["baz"]) def test_dups_fancy_indexing(self): # GH 3455 from pandas.util.testing import makeCustomDataframe as mkdf df = mkdf(10, 3) df.columns = ["a", "a", "b"] result = df[["b", "a"]].columns expected = Index(["b", "a", "a"]) tm.assert_index_equal(result, expected) # across dtypes df = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]], columns=list("aaaaaaa")) df.head() str(df) result = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]]) result.columns = list("aaaaaaa") # TODO(wesm): unused? df_v = df.iloc[:, 4] # noqa res_v = result.iloc[:, 4] # noqa tm.assert_frame_equal(df, result) # GH 3561, dups not in selected order df = DataFrame( {"test": [5, 7, 9, 11], "test1": [4.0, 5, 6, 7], "other": list("abcd")}, index=["A", "A", "B", "C"], ) rows = ["C", "B"] expected = DataFrame( {"test": [11, 9], "test1": [7.0, 6], "other": ["d", "c"]}, index=rows ) result = df.loc[rows] tm.assert_frame_equal(result, expected) result = df.loc[Index(rows)] tm.assert_frame_equal(result, expected) rows = ["C", "B", "E"] expected = DataFrame( { "test": [11, 9, np.nan], "test1": [7.0, 6, np.nan], "other": ["d", "c", np.nan], }, index=rows, ) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.loc[rows] tm.assert_frame_equal(result, expected) # see GH5553, make sure we use the right indexer rows = ["F", "G", "H", "C", "B", "E"] expected = DataFrame( { "test": [np.nan, np.nan, np.nan, 11, 9, np.nan], "test1": [np.nan, np.nan, np.nan, 7.0, 6, np.nan], "other": [np.nan, np.nan, np.nan, "d", "c", np.nan], }, index=rows, ) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.loc[rows] tm.assert_frame_equal(result, expected) # List containing only missing label dfnu = DataFrame(np.random.randn(5, 3), index=list("AABCD")) with pytest.raises( KeyError, match=re.escape( "\"None of [Index(['E'], dtype='object')] are in the [index]\"" ), ): dfnu.loc[["E"]] # ToDo: check_index_type can be True after GH 11497 # GH 4619; duplicate indexer with missing label df = DataFrame({"A": [0, 1, 2]}) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.loc[[0, 8, 0]] expected = DataFrame({"A": [0, np.nan, 0]}, index=[0, 8, 0]) tm.assert_frame_equal(result, expected, check_index_type=False) df = DataFrame({"A": list("abc")}) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.loc[[0, 8, 0]] expected = DataFrame({"A": ["a", np.nan, "a"]}, index=[0, 8, 0]) tm.assert_frame_equal(result, expected, check_index_type=False) # non unique with non unique selector df = DataFrame({"test": [5, 7, 9, 11]}, index=["A", "A", "B", "C"]) expected = DataFrame( {"test": [5, 7, 5, 7, np.nan]}, index=["A", "A", "A", "A", "E"] ) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.loc[["A", "A", "E"]] tm.assert_frame_equal(result, expected) def test_dups_fancy_indexing2(self): # GH 5835 # dups on index and missing values df = DataFrame(np.random.randn(5, 5), columns=["A", "B", "B", "B", "A"]) expected = pd.concat( [df.loc[:, ["A", "B"]], DataFrame(np.nan, columns=["C"], index=df.index)], axis=1, ) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.loc[:, ["A", "B", "C"]] tm.assert_frame_equal(result, expected) # GH 6504, multi-axis indexing df = DataFrame( np.random.randn(9, 2), index=[1, 1, 1, 2, 2, 2, 3, 3, 3], columns=["a", "b"] ) expected = df.iloc[0:6] result = df.loc[[1, 2]] tm.assert_frame_equal(result, expected) expected = df result = df.loc[:, ["a", "b"]] tm.assert_frame_equal(result, expected) expected = df.iloc[0:6, :] result = df.loc[[1, 2], ["a", "b"]] tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("case", [lambda s: s, lambda s: s.loc]) def test_duplicate_int_indexing(self, case): # GH 17347 s = pd.Series(range(3), index=[1, 1, 3]) expected = s[1] result = case(s)[[1]] tm.assert_series_equal(result, expected) def test_indexing_mixed_frame_bug(self): # GH3492 df = DataFrame( {"a": {1: "aaa", 2: "bbb", 3: "ccc"}, "b": {1: 111, 2: 222, 3: 333}} ) # this works, new column is created correctly df["test"] = df["a"].apply(lambda x: "_" if x == "aaa" else x) # this does not work, ie column test is not changed idx = df["test"] == "_" temp = df.loc[idx, "a"].apply(lambda x: "-----" if x == "aaa" else x) df.loc[idx, "test"] = temp assert df.iloc[0, 2] == "-----" # if I look at df, then element [0,2] equals '_'. If instead I type # df.ix[idx,'test'], I get '-----', finally by typing df.iloc[0,2] I # get '_'. def test_multitype_list_index_access(self): # GH 10610 df = DataFrame(np.random.random((10, 5)), columns=["a"] + [20, 21, 22, 23]) with pytest.raises(KeyError, match=re.escape("'[-8, 26] not in index'")): df[[22, 26, -8]] assert df[21].shape[0] == df.shape[0] def test_set_index_nan(self): # GH 3586 df = DataFrame( { "PRuid": { 17: "nonQC", 18: "nonQC", 19: "nonQC", 20: "10", 21: "11", 22: "12", 23: "13", 24: "24", 25: "35", 26: "46", 27: "47", 28: "48", 29: "59", 30: "10", }, "QC": { 17: 0.0, 18: 0.0, 19: 0.0, 20: np.nan, 21: np.nan, 22: np.nan, 23: np.nan, 24: 1.0, 25: np.nan, 26: np.nan, 27: np.nan, 28: np.nan, 29: np.nan, 30: np.nan, }, "data": { 17: 7.9544899999999998, 18: 8.0142609999999994, 19: 7.8591520000000008, 20: 0.86140349999999999, 21: 0.87853110000000001, 22: 0.8427041999999999, 23: 0.78587700000000005, 24: 0.73062459999999996, 25: 0.81668560000000001, 26: 0.81927080000000008, 27: 0.80705009999999999, 28: 0.81440240000000008, 29: 0.80140849999999997, 30: 0.81307740000000006, }, "year": { 17: 2006, 18: 2007, 19: 2008, 20: 1985, 21: 1985, 22: 1985, 23: 1985, 24: 1985, 25: 1985, 26: 1985, 27: 1985, 28: 1985, 29: 1985, 30: 1986, }, } ).reset_index() result = ( df.set_index(["year", "PRuid", "QC"]) .reset_index() .reindex(columns=df.columns) ) tm.assert_frame_equal(result, df) def test_multi_assign(self): # GH 3626, an assignment of a sub-df to a df df = DataFrame( { "FC": ["a", "b", "a", "b", "a", "b"], "PF": [0, 0, 0, 0, 1, 1], "col1": list(range(6)), "col2": list(range(6, 12)), } ) df.iloc[1, 0] = np.nan df2 = df.copy() mask = ~df2.FC.isna() cols = ["col1", "col2"] dft = df2 * 2 dft.iloc[3, 3] = np.nan expected = DataFrame( { "FC": ["a", np.nan, "a", "b", "a", "b"], "PF": [0, 0, 0, 0, 1, 1], "col1": Series([0, 1, 4, 6, 8, 10]), "col2": [12, 7, 16, np.nan, 20, 22], } ) # frame on rhs df2.loc[mask, cols] = dft.loc[mask, cols] tm.assert_frame_equal(df2, expected) df2.loc[mask, cols] = dft.loc[mask, cols] tm.assert_frame_equal(df2, expected) # with an ndarray on rhs # coerces to float64 because values has float64 dtype # GH 14001 expected = DataFrame( { "FC": ["a", np.nan, "a", "b", "a", "b"], "PF": [0, 0, 0, 0, 1, 1], "col1": [0.0, 1.0, 4.0, 6.0, 8.0, 10.0], "col2": [12, 7, 16, np.nan, 20, 22], } ) df2 = df.copy() df2.loc[mask, cols] = dft.loc[mask, cols].values tm.assert_frame_equal(df2, expected) df2.loc[mask, cols] = dft.loc[mask, cols].values tm.assert_frame_equal(df2, expected) # broadcasting on the rhs is required df = DataFrame( dict( A=[1, 2, 0, 0, 0], B=[0, 0, 0, 10, 11], C=[0, 0, 0, 10, 11], D=[3, 4, 5, 6, 7], ) ) expected = df.copy() mask = expected["A"] == 0 for col in ["A", "B"]: expected.loc[mask, col] = df["D"] df.loc[df["A"] == 0, ["A", "B"]] = df["D"] tm.assert_frame_equal(df, expected) def test_setitem_list(self): # GH 6043 # ix with a list df = DataFrame(index=[0, 1], columns=[0]) with catch_warnings(record=True): simplefilter("ignore") df.ix[1, 0] = [1, 2, 3] df.ix[1, 0] = [1, 2] result = DataFrame(index=[0, 1], columns=[0]) with catch_warnings(record=True): simplefilter("ignore") result.ix[1, 0] = [1, 2] tm.assert_frame_equal(result, df) # ix with an object class TO: def __init__(self, value): self.value = value def __str__(self): return "[{0}]".format(self.value) __repr__ = __str__ def __eq__(self, other): return self.value == other.value def view(self): return self df = DataFrame(index=[0, 1], columns=[0]) with catch_warnings(record=True): simplefilter("ignore") df.ix[1, 0] = TO(1) df.ix[1, 0] = TO(2) result = DataFrame(index=[0, 1], columns=[0]) with catch_warnings(record=True): simplefilter("ignore") result.ix[1, 0] = TO(2) tm.assert_frame_equal(result, df) # remains object dtype even after setting it back df = DataFrame(index=[0, 1], columns=[0]) with catch_warnings(record=True): simplefilter("ignore") df.ix[1, 0] = TO(1) df.ix[1, 0] = np.nan result = DataFrame(index=[0, 1], columns=[0]) tm.assert_frame_equal(result, df) def test_string_slice(self): # GH 14424 # string indexing against datetimelike with object # dtype should properly raises KeyError df = DataFrame([1], Index([pd.Timestamp("2011-01-01")], dtype=object)) assert df.index.is_all_dates with pytest.raises(KeyError, match="'2011'"): df["2011"] with pytest.raises(KeyError, match="'2011'"): df.loc["2011", 0] df = DataFrame() assert not df.index.is_all_dates with pytest.raises(KeyError, match="'2011'"): df["2011"] with pytest.raises(KeyError, match="'2011'"): df.loc["2011", 0] def test_astype_assignment(self): # GH4312 (iloc) df_orig = DataFrame( [["1", "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") ) df = df_orig.copy() df.iloc[:, 0:2] = df.iloc[:, 0:2].astype(np.int64) expected = DataFrame( [[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") ) tm.assert_frame_equal(df, expected) df = df_orig.copy() df.iloc[:, 0:2] = df.iloc[:, 0:2]._convert(datetime=True, numeric=True) expected = DataFrame( [[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") ) tm.assert_frame_equal(df, expected) # GH5702 (loc) df = df_orig.copy() df.loc[:, "A"] = df.loc[:, "A"].astype(np.int64) expected = DataFrame( [[1, "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") ) tm.assert_frame_equal(df, expected) df = df_orig.copy() df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype(np.int64) expected = DataFrame( [["1", 2, 3, ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") ) tm.assert_frame_equal(df, expected) # full replacements / no nans df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]}) df.iloc[:, 0] = df["A"].astype(np.int64) expected = DataFrame({"A": [1, 2, 3, 4]}) tm.assert_frame_equal(df, expected) df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]}) df.loc[:, "A"] = df["A"].astype(np.int64) expected = DataFrame({"A": [1, 2, 3, 4]}) tm.assert_frame_equal(df, expected) @pytest.mark.parametrize( "index,val", [ (Index([0, 1, 2]), 2), (Index([0, 1, "2"]), "2"), (Index([0, 1, 2, np.inf, 4]), 4), (Index([0, 1, 2, np.nan, 4]), 4), (Index([0, 1, 2, np.inf]), np.inf), (Index([0, 1, 2, np.nan]), np.nan), ], ) def test_index_contains(self, index, val): assert val in index @pytest.mark.parametrize( "index,val", [ (Index([0, 1, 2]), "2"), (Index([0, 1, "2"]), 2), (Index([0, 1, 2, np.inf]), 4), (Index([0, 1, 2, np.nan]), 4), (Index([0, 1, 2, np.inf]), np.nan), (Index([0, 1, 2, np.nan]), np.inf), # Checking if np.inf in Int64Index should not cause an OverflowError # Related to GH 16957 (pd.Int64Index([0, 1, 2]), np.inf), (pd.Int64Index([0, 1, 2]), np.nan), (pd.UInt64Index([0, 1, 2]), np.inf), (pd.UInt64Index([0, 1, 2]), np.nan), ], ) def test_index_not_contains(self, index, val): assert val not in index @pytest.mark.parametrize( "index,val", [(Index([0, 1, "2"]), 0), (Index([0, 1, "2"]), "2")] ) def test_mixed_index_contains(self, index, val): # GH 19860 assert val in index @pytest.mark.parametrize( "index,val", [(Index([0, 1, "2"]), "1"), (Index([0, 1, "2"]), 2)] ) def test_mixed_index_not_contains(self, index, val): # GH 19860 assert val not in index def test_contains_with_float_index(self): # GH#22085 integer_index = pd.Int64Index([0, 1, 2, 3]) uinteger_index = pd.UInt64Index([0, 1, 2, 3]) float_index = pd.Float64Index([0.1, 1.1, 2.2, 3.3]) for index in (integer_index, uinteger_index): assert 1.1 not in index assert 1.0 in index assert 1 in index assert 1.1 in float_index assert 1.0 not in float_index assert 1 not in float_index def test_index_type_coercion(self): with catch_warnings(record=True): simplefilter("ignore") # GH 11836 # if we have an index type and set it with something that looks # to numpy like the same, but is actually, not # (e.g. setting with a float or string '0') # then we need to coerce to object # integer indexes for s in [Series(range(5)), Series(range(5), index=range(1, 6))]: assert s.index.is_integer() for indexer in [lambda x: x.ix, lambda x: x.loc, lambda x: x]: s2 = s.copy() indexer(s2)[0.1] = 0 assert s2.index.is_floating() assert indexer(s2)[0.1] == 0 s2 = s.copy() indexer(s2)[0.0] = 0 exp = s.index if 0 not in s: exp = Index(s.index.tolist() + [0]) tm.assert_index_equal(s2.index, exp) s2 = s.copy() indexer(s2)["0"] = 0 assert s2.index.is_object() for s in [Series(range(5), index=np.arange(5.0))]: assert s.index.is_floating() for idxr in [lambda x: x.ix, lambda x: x.loc, lambda x: x]: s2 = s.copy() idxr(s2)[0.1] = 0 assert s2.index.is_floating() assert idxr(s2)[0.1] == 0 s2 = s.copy() idxr(s2)[0.0] = 0 tm.assert_index_equal(s2.index, s.index) s2 = s.copy() idxr(s2)["0"] = 0 assert s2.index.is_object()
@pytest.fixture(params=zeros) def zero(request): # For testing division by (or of) zero for Index with length 5, this # gives several scalar-zeros and length-5 vector-zeros return request.param # ------------------------------------------------------------------ # Vector Fixtures @pytest.fixture( params=[ pd.Float64Index(np.arange(5, dtype="float64")), pd.Int64Index(np.arange(5, dtype="int64")), pd.UInt64Index(np.arange(5, dtype="uint64")), pd.RangeIndex(5), ], ids=lambda x: type(x).__name__, ) def numeric_idx(request): """ Several types of numeric-dtypes Index objects """ return request.param # ------------------------------------------------------------------ # Scalar Fixtures
def test_astype_uint(self): arr = period_range("2000", periods=2) expected = pd.UInt64Index(np.array([10957, 10958], dtype="uint64")) tm.assert_index_equal(arr.astype("uint64"), expected) tm.assert_index_equal(arr.astype("uint32"), expected)