def test_pi_sub_pdnat(self): # GH#13071 idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], freq='M', name='idx') exp = pd.TimedeltaIndex([pd.NaT] * 4, name='idx') tm.assert_index_equal(pd.NaT - idx, exp) tm.assert_index_equal(idx - pd.NaT, exp)
def test_inf_upcast(self): # GH 16957 # We should be able to use np.inf as a key # np.inf should cause an index to convert to float # Test with np.inf in rows df = DataFrame(columns=[0]) df.loc[1] = 1 df.loc[2] = 2 df.loc[np.inf] = 3 # make sure we can look up the value assert df.loc[np.inf, 0] == 3 result = df.index expected = pd.Float64Index([1, 2, np.inf]) tm.assert_index_equal(result, expected) # Test with np.inf in columns df = DataFrame() df.loc[0, 0] = 1 df.loc[1, 1] = 2 df.loc[0, np.inf] = 3 result = df.columns expected = pd.Float64Index([0, 1, np.inf]) tm.assert_index_equal(result, expected)
def test_roundtrip_pickle_with_tz(self): # GH 8367 # round-trip of timezone index = date_range('20130101', periods=3, tz='US/Eastern', name='foo') unpickled = tm.round_trip_pickle(index) tm.assert_index_equal(index, unpickled)
def test_pi_add_offset_array(self, box): # GH#18849 pi = pd.PeriodIndex([pd.Period('2015Q1'), pd.Period('2016Q2')]) offs = box([pd.offsets.QuarterEnd(n=1, startingMonth=12), pd.offsets.QuarterEnd(n=-2, startingMonth=12)]) expected = pd.PeriodIndex([pd.Period('2015Q2'), pd.Period('2015Q4')]) with tm.assert_produces_warning(PerformanceWarning): res = pi + offs tm.assert_index_equal(res, expected) with tm.assert_produces_warning(PerformanceWarning): res2 = offs + pi tm.assert_index_equal(res2, expected) unanchored = np.array([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)]) # addition/subtraction ops with incompatible offsets should issue # a PerformanceWarning and _then_ raise a TypeError. with pytest.raises(IncompatibleFrequency): with tm.assert_produces_warning(PerformanceWarning): pi + unanchored with pytest.raises(IncompatibleFrequency): with tm.assert_produces_warning(PerformanceWarning): unanchored + pi
def test_map_bug_1677(self): index = DatetimeIndex(['2012-04-25 09:30:00.393000']) f = index.asof result = index.map(f) expected = Index([f(index[0])]) tm.assert_index_equal(result, expected)
def test_get_duplicates(self): idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-02', '2000-01-03', '2000-01-03', '2000-01-04']) result = idx.get_duplicates() ex = DatetimeIndex(['2000-01-02', '2000-01-03']) tm.assert_index_equal(result, ex)
def test_map(self): rng = date_range('1/1/2000', periods=10) f = lambda x: x.strftime('%Y%m%d') result = rng.map(f) exp = Index([f(x) for x in rng], dtype='<U8') tm.assert_index_equal(result, exp)
def check_coerce(self, a, b, is_float_index=True): assert a.equals(b) tm.assert_index_equal(a, b, exact=False) if is_float_index: assert isinstance(b, Float64Index) else: self.check_is_index(b)
def test_take_fill_value(self): # see gh-12631 idx = self._holder([1, 2, 3], name='xxx') result = idx.take(np.array([1, 0, -1])) expected = self._holder([2, 1, 3], name='xxx') tm.assert_index_equal(result, expected) name = self._holder.__name__ msg = ("Unable to fill values because " "{name} cannot contain NA").format(name=name) # fill_value=True with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -1]), fill_value=True) # allow_fill=False result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = self._holder([2, 1, 3], name='xxx') tm.assert_index_equal(result, expected) with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): idx.take(np.array([1, -5]))
def test_begin_year_alias(self, freq): # see gh-9313 rng = date_range("1/1/2013", "7/1/2017", freq=freq) exp = pd.DatetimeIndex(["2013-01-01", "2014-01-01", "2015-01-01", "2016-01-01", "2017-01-01"], freq=freq) tm.assert_index_equal(rng, exp)
def _assert_insert_conversion(self, original, value, expected, expected_dtype): """ test coercion triggered by insert """ target = original.copy() res = target.insert(1, value) tm.assert_index_equal(res, expected) assert res.dtype == expected_dtype
def test_constructor_interval(self): result = Categorical([Interval(1, 2), Interval(2, 3), Interval(3, 6)], ordered=True) ii = IntervalIndex([Interval(1, 2), Interval(2, 3), Interval(3, 6)]) exp = Categorical(ii, ordered=True) tm.assert_categorical_equal(result, exp) tm.assert_index_equal(result.categories, ii)
def test_update_dtype_string(self, ordered): dtype = CategoricalDtype(list('abc'), ordered) expected_categories = dtype.categories expected_ordered = dtype.ordered result = dtype.update_dtype('category') tm.assert_index_equal(result.categories, expected_categories) assert result.ordered is expected_ordered
def test_constructor_from_index_series_timedelta(self): idx = timedelta_range('1 days', freq='D', periods=3) result = Categorical(idx) tm.assert_index_equal(result.categories, idx) result = Categorical(Series(idx)) tm.assert_index_equal(result.categories, idx)
def test_constructor_from_index_series_period(self): idx = period_range('2015-01-01', freq='D', periods=3) result = Categorical(idx) tm.assert_index_equal(result.categories, idx) result = Categorical(Series(idx)) tm.assert_index_equal(result.categories, idx)
def test_combineFrame(self, float_frame, mixed_float_frame, mixed_int_frame): frame_copy = float_frame.reindex(float_frame.index[::2]) del frame_copy['D'] frame_copy['C'][:5] = np.nan added = float_frame + frame_copy indexer = added['A'].dropna().index exp = (float_frame['A'] * 2).copy() tm.assert_series_equal(added['A'].dropna(), exp.loc[indexer]) exp.loc[~exp.index.isin(indexer)] = np.nan tm.assert_series_equal(added['A'], exp.loc[added['A'].index]) assert np.isnan(added['C'].reindex(frame_copy.index)[:5]).all() # assert(False) assert np.isnan(added['D']).all() self_added = float_frame + float_frame tm.assert_index_equal(self_added.index, float_frame.index) added_rev = frame_copy + float_frame assert np.isnan(added['D']).all() assert np.isnan(added_rev['D']).all() # corner cases # empty plus_empty = float_frame + DataFrame() assert np.isnan(plus_empty.values).all() empty_plus = DataFrame() + float_frame assert np.isnan(empty_plus.values).all() empty_empty = DataFrame() + DataFrame() assert empty_empty.empty # out of order reverse = float_frame.reindex(columns=float_frame.columns[::-1]) assert_frame_equal(reverse + float_frame, float_frame * 2) # mix vs float64, upcast added = float_frame + mixed_float_frame _check_mixed_float(added, dtype='float64') added = mixed_float_frame + float_frame _check_mixed_float(added, dtype='float64') # mix vs mix added = mixed_float_frame + mixed_float_frame _check_mixed_float(added, dtype=dict(C=None)) # with int added = float_frame + mixed_int_frame _check_mixed_float(added, dtype='float64')
def test_constructor_with_datetimelike(self): # 12077 # constructor wwth a datetimelike and NaT for dtl in [date_range('1995-01-01 00:00:00', periods=5, freq='s'), date_range('1995-01-01 00:00:00', periods=5, freq='s', tz='US/Eastern'), timedelta_range('1 day', periods=5, freq='s')]: s = Series(dtl) c = Categorical(s) expected = type(dtl)(s) expected.freq = None tm.assert_index_equal(c.categories, expected) tm.assert_numpy_array_equal(c.codes, np.arange(5, dtype='int8')) # with NaT s2 = s.copy() s2.iloc[-1] = NaT c = Categorical(s2) expected = type(dtl)(s2.dropna()) expected.freq = None tm.assert_index_equal(c.categories, expected) exp = np.array([0, 1, 2, 3, -1], dtype=np.int8) tm.assert_numpy_array_equal(c.codes, exp) result = repr(c) assert 'NaT' in result
def test_frame_to_period(self): K = 5 from pandas.tseries.period import period_range dr = date_range('1/1/2000', '1/1/2001') pr = period_range('1/1/2000', '1/1/2001') df = DataFrame(randn(len(dr), K), index=dr) df['mix'] = 'a' pts = df.to_period() exp = df.copy() exp.index = pr assert_frame_equal(pts, exp) pts = df.to_period('M') tm.assert_index_equal(pts.index, exp.index.asfreq('M')) df = df.T pts = df.to_period(axis=1) exp = df.copy() exp.columns = pr assert_frame_equal(pts, exp) pts = df.to_period('M', axis=1) tm.assert_index_equal(pts.columns, exp.columns.asfreq('M')) self.assertRaises(ValueError, df.to_period, axis=2)
def test_value_counts_inferred(self): klasses = [Index, Series] for klass in klasses: s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a'] s = klass(s_values) expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c']) tm.assert_series_equal(s.value_counts(), expected) if isinstance(s, Index): exp = Index(np.unique(np.array(s_values, dtype=np.object_))) tm.assert_index_equal(s.unique(), exp) else: exp = np.unique(np.array(s_values, dtype=np.object_)) tm.assert_numpy_array_equal(s.unique(), exp) assert s.nunique() == 4 # don't sort, have to sort after the fact as not sorting is # platform-dep hist = s.value_counts(sort=False).sort_values() expected = Series([3, 1, 4, 2], index=list('acbd')).sort_values() tm.assert_series_equal(hist, expected) # sort ascending hist = s.value_counts(ascending=True) expected = Series([1, 2, 3, 4], index=list('cdab')) tm.assert_series_equal(hist, expected) # relative histogram. hist = s.value_counts(normalize=True) expected = Series([.4, .3, .2, .1], index=['b', 'a', 'd', 'c']) tm.assert_series_equal(hist, expected)
def test_delete(self): idx = timedelta_range(start='1 Days', periods=5, freq='D', name='idx') # prserve freq expected_0 = timedelta_range(start='2 Days', periods=4, freq='D', name='idx') expected_4 = timedelta_range(start='1 Days', periods=4, freq='D', name='idx') # reset freq to None expected_1 = TimedeltaIndex( ['1 day', '3 day', '4 day', '5 day'], freq=None, name='idx') cases = {0: expected_0, -5: expected_0, -1: expected_4, 4: expected_4, 1: expected_1} for n, expected in compat.iteritems(cases): result = idx.delete(n) tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == expected.freq with pytest.raises((IndexError, ValueError)): # either depeidnig on numpy version result = idx.delete(5)
def test_delete_slice(self): idx = timedelta_range(start='1 days', periods=10, freq='D', name='idx') # prserve freq expected_0_2 = timedelta_range(start='4 days', periods=7, freq='D', name='idx') expected_7_9 = timedelta_range(start='1 days', periods=7, freq='D', name='idx') # reset freq to None expected_3_5 = TimedeltaIndex(['1 d', '2 d', '3 d', '7 d', '8 d', '9 d', '10d'], freq=None, name='idx') cases = {(0, 1, 2): expected_0_2, (7, 8, 9): expected_7_9, (3, 4, 5): expected_3_5} for n, expected in compat.iteritems(cases): result = idx.delete(n) tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == expected.freq result = idx.delete(slice(n[0], n[-1] + 1)) tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == expected.freq
def test_mul_index(self): idx = self.create_index() # in general not true for RangeIndex if not isinstance(idx, RangeIndex): result = idx * idx tm.assert_index_equal(result, idx ** 2)
def test_CalendarDay_range_with_dst_crossing(self): # GH 20596 result = date_range('2018-10-23', '2018-11-06', freq='7CD', tz='Europe/Paris') expected = date_range('2018-10-23', '2018-11-06', freq=pd.DateOffset(days=7), tz='Europe/Paris') tm.assert_index_equal(result, expected)
def test_union_store_indexes(populate_store): store = pandas.HDFStore(populate_store['name'], mode = 'r+') index = populate_store['index'] union = utils.union_store_indexes(store) assert_index_equal(index, union) store.close() os.remove(populate_store['name'])
def test_concat_unions_categoricals(): # Categorical DataFrame, regular index tm.assert_frame_equal(_concat(frames), pd.concat(frames2)) # Categorical Series, regular index tm.assert_series_equal(_concat([i.y for i in frames]), pd.concat([i.y for i in frames2])) # Categorical Index tm.assert_index_equal(_concat([i.index for i in frames3]), pd.concat([i for i in frames4]).index) # Categorical DataFrame, Categorical Index tm.assert_frame_equal(_concat(frames3), pd.concat(frames4)) # Non-categorical DataFrame, Categorical Index tm.assert_frame_equal(_concat([i[['x', 'z']] for i in frames3]), pd.concat([i[['x', 'z']] for i in frames4])) # Categorical Series, Categorical Index tm.assert_series_equal(_concat([i.z for i in frames3]), pd.concat([i.z for i in frames4])) # Non-categorical Series, Categorical Index tm.assert_series_equal(_concat([i.x for i in frames3]), pd.concat([i.x for i in frames4])) # MultiIndex with Categorical Index tm.assert_index_equal(_concat([i.index for i in frames5]), pd.concat([i for i in frames6]).index) # DataFrame, MultiIndex with CategoricalIndex tm.assert_frame_equal(_concat(frames5), pd.concat(frames6))
def test_constructor_single_level(): result = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], labels=[[0, 1, 2, 3]], names=['first']) assert isinstance(result, MultiIndex) expected = Index(['foo', 'bar', 'baz', 'qux'], name='first') tm.assert_index_equal(result.levels[0], expected) assert result.names == ['first']
def test_astype_object2(self): idx = pd.period_range(start='2013-01-01', periods=4, freq='M', name='idx') expected_list = [pd.Period('2013-01-31', freq='M'), pd.Period('2013-02-28', freq='M'), pd.Period('2013-03-31', freq='M'), pd.Period('2013-04-30', freq='M')] expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.astype(object) assert isinstance(result, Index) assert result.dtype == object tm.assert_index_equal(result, expected) assert result.name == expected.name assert idx.tolist() == expected_list idx = PeriodIndex(['2013-01-01', '2013-01-02', 'NaT', '2013-01-04'], freq='D', name='idx') expected_list = [pd.Period('2013-01-01', freq='D'), pd.Period('2013-01-02', freq='D'), pd.Period('NaT', freq='D'), pd.Period('2013-01-04', freq='D')] expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.astype(object) assert isinstance(result, Index) assert result.dtype == object tm.assert_index_equal(result, expected) for i in [0, 1, 3]: assert result[i] == expected[i] assert result[2] is pd.NaT assert result.name == expected.name result_list = idx.tolist() for i in [0, 1, 3]: assert result_list[i] == expected_list[i] assert result_list[2] is pd.NaT
def test_categorical_order(self): # Directly construct using expected codes # Format is is_cat, col_name, labels (in order), underlying data expected = [(True, 'ordered', ['a', 'b', 'c', 'd', 'e'], np.arange(5)), (True, 'reverse', ['a', 'b', 'c', 'd', 'e'], np.arange(5)[::-1]), (True, 'noorder', ['a', 'b', 'c', 'd', 'e'], np.array([2, 1, 4, 0, 3])), (True, 'floating', ['a', 'b', 'c', 'd', 'e'], np.arange(0, 5)), (True, 'float_missing', ['a', 'd', 'e'], np.array([0, 1, 2, -1, -1])), (False, 'nolabel', [1.0, 2.0, 3.0, 4.0, 5.0], np.arange(5)), (True, 'int32_mixed', ['d', 2, 'e', 'b', 'a'], np.arange(5))] cols = [] for is_cat, col, labels, codes in expected: if is_cat: cols.append((col, pd.Categorical.from_codes(codes, labels))) else: cols.append((col, pd.Series(labels, dtype=np.float32))) expected = DataFrame.from_items(cols) # Read with and with out categoricals, ensure order is identical parsed_115 = read_stata(self.dta19_115) parsed_117 = read_stata(self.dta19_117) tm.assert_frame_equal(expected, parsed_115) tm.assert_frame_equal(expected, parsed_117) # Check identity of codes for col in expected: if is_categorical_dtype(expected[col]): tm.assert_series_equal(expected[col].cat.codes, parsed_115[col].cat.codes) tm.assert_index_equal(expected[col].cat.categories, parsed_115[col].cat.categories)
def test_ensure_copied_data(self): # Check the "copy" argument of each Index.__new__ is honoured # GH12309 for name, index in compat.iteritems(self.indices): init_kwargs = {} if isinstance(index, PeriodIndex): # Needs "freq" specification: init_kwargs['freq'] = index.freq elif isinstance(index, (RangeIndex, MultiIndex, CategoricalIndex)): # RangeIndex cannot be initialized from data # MultiIndex and CategoricalIndex are tested separately continue index_type = index.__class__ result = index_type(index.values, copy=True, **init_kwargs) tm.assert_index_equal(index, result) tm.assert_numpy_array_equal(index.values, result.values, check_same='copy') if not isinstance(index, PeriodIndex): result = index_type(index.values, copy=False, **init_kwargs) tm.assert_numpy_array_equal(index.values, result.values, check_same='same') tm.assert_numpy_array_equal(index._values, result._values, check_same='same') else: # .values an object array of Period, thus copied result = index_type(ordinal=index.asi8, copy=False, **init_kwargs) tm.assert_numpy_array_equal(index._values, result._values, check_same='same')
def test_rpow_float(self): # test power calculations both ways, GH 14973 idx = self.create_index() expected = pd.Float64Index(2.0**idx.values) result = 2.0**idx tm.assert_index_equal(result, expected)
def test_datetime_name_accessors(self, time_locale): # Test Monday -> Sunday and January -> December, in that sequence if time_locale is None: # If the time_locale is None, day-name and month_name should # return the english attributes expected_days = [ "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday", ] expected_months = [ "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December", ] else: with tm.set_locale(time_locale, locale.LC_TIME): expected_days = calendar.day_name[:] expected_months = calendar.month_name[1:] # GH#11128 dti = pd.date_range(freq="D", start=datetime(1998, 1, 1), periods=365) english_days = [ "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday", ] for day, name, eng_name in zip(range(4, 11), expected_days, english_days): name = name.capitalize() assert dti.day_name(locale=time_locale)[day] == name ts = Timestamp(datetime(2016, 4, day)) assert ts.day_name(locale=time_locale) == name dti = dti.append(DatetimeIndex([pd.NaT])) assert np.isnan(dti.day_name(locale=time_locale)[-1]) ts = Timestamp(pd.NaT) assert np.isnan(ts.day_name(locale=time_locale)) # GH#12805 dti = pd.date_range(freq="M", start="2012", end="2013") result = dti.month_name(locale=time_locale) expected = Index([month.capitalize() for month in expected_months]) # work around different normalization schemes # https://github.com/pandas-dev/pandas/issues/22342 result = result.str.normalize("NFD") expected = expected.str.normalize("NFD") tm.assert_index_equal(result, expected) for date, expected in zip(dti, expected_months): result = date.month_name(locale=time_locale) expected = expected.capitalize() result = unicodedata.normalize("NFD", result) expected = unicodedata.normalize("NFD", result) assert result == expected dti = dti.append(DatetimeIndex([pd.NaT])) assert np.isnan(dti.month_name(locale=time_locale)[-1])
def test_from_product_empty_one_level(): result = MultiIndex.from_product([[]], names=['A']) expected = pd.Index([], name='A') tm.assert_index_equal(result.levels[0], expected)
def test_from_tuples_empty(): # GH 16777 result = MultiIndex.from_tuples([], names=['a', 'b']) expected = MultiIndex.from_arrays(arrays=[[], []], names=['a', 'b']) tm.assert_index_equal(result, expected)
def test_from_arrays_index_series_categorical(): # GH13743 idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=False) idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=True) result = pd.MultiIndex.from_arrays([idx1, idx2]) tm.assert_index_equal(result.get_level_values(0), idx1) tm.assert_index_equal(result.get_level_values(1), idx2) result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)]) tm.assert_index_equal(result2.get_level_values(0), idx1) tm.assert_index_equal(result2.get_level_values(1), idx2) result3 = pd.MultiIndex.from_arrays([idx1.values, idx2.values]) tm.assert_index_equal(result3.get_level_values(0), idx1) tm.assert_index_equal(result3.get_level_values(1), idx2)
def test_from_arrays_index_datetimelike_mixed(): idx1 = pd.date_range('2015-01-01 10:00', freq='D', periods=3, tz='US/Eastern') idx2 = pd.date_range('2015-01-01 10:00', freq='H', periods=3) idx3 = pd.timedelta_range('1 days', freq='D', periods=3) idx4 = pd.period_range('2011-01-01', freq='D', periods=3) result = pd.MultiIndex.from_arrays([idx1, idx2, idx3, idx4]) tm.assert_index_equal(result.get_level_values(0), idx1) tm.assert_index_equal(result.get_level_values(1), idx2) tm.assert_index_equal(result.get_level_values(2), idx3) tm.assert_index_equal(result.get_level_values(3), idx4) result2 = pd.MultiIndex.from_arrays( [pd.Series(idx1), pd.Series(idx2), pd.Series(idx3), pd.Series(idx4)]) tm.assert_index_equal(result2.get_level_values(0), idx1) tm.assert_index_equal(result2.get_level_values(1), idx2) tm.assert_index_equal(result2.get_level_values(2), idx3) tm.assert_index_equal(result2.get_level_values(3), idx4) tm.assert_index_equal(result, result2)
def test_dups_fancy_indexing(self): # GH 3455 df = tm.makeCustomDataframe(10, 3) df.columns = ["a", "a", "b"] result = df[["b", "a"]].columns expected = Index(["b", "a", "a"]) tm.assert_index_equal(result, expected) # across dtypes df = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]], columns=list("aaaaaaa")) df.head() str(df) result = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]]) result.columns = list("aaaaaaa") # TODO(wesm): unused? df_v = df.iloc[:, 4] # noqa res_v = result.iloc[:, 4] # noqa tm.assert_frame_equal(df, result) # GH 3561, dups not in selected order df = DataFrame( { "test": [5, 7, 9, 11], "test1": [4.0, 5, 6, 7], "other": list("abcd") }, index=["A", "A", "B", "C"], ) rows = ["C", "B"] expected = DataFrame( { "test": [11, 9], "test1": [7.0, 6], "other": ["d", "c"] }, index=rows) result = df.loc[rows] tm.assert_frame_equal(result, expected) result = df.loc[Index(rows)] tm.assert_frame_equal(result, expected) rows = ["C", "B", "E"] with pytest.raises(KeyError, match="with any missing labels"): df.loc[rows] # see GH5553, make sure we use the right indexer rows = ["F", "G", "H", "C", "B", "E"] with pytest.raises(KeyError, match="with any missing labels"): df.loc[rows] # List containing only missing label dfnu = DataFrame(np.random.randn(5, 3), index=list("AABCD")) with pytest.raises( KeyError, match=re.escape( "\"None of [Index(['E'], dtype='object')] are in the [index]\"" ), ): dfnu.loc[["E"]] # ToDo: check_index_type can be True after GH 11497 # GH 4619; duplicate indexer with missing label df = DataFrame({"A": [0, 1, 2]}) with pytest.raises(KeyError, match="with any missing labels"): df.loc[[0, 8, 0]] df = DataFrame({"A": list("abc")}) with pytest.raises(KeyError, match="with any missing labels"): df.loc[[0, 8, 0]] # non unique with non unique selector df = DataFrame({"test": [5, 7, 9, 11]}, index=["A", "A", "B", "C"]) with pytest.raises(KeyError, match="with any missing labels"): df.loc[["A", "A", "E"]]
def test_nanosecond_field(self): dti = DatetimeIndex(np.arange(10)) tm.assert_index_equal(dti.nanosecond, pd.Index(np.arange(10, dtype=np.int64)))
def test_datetimeindex_accessors(self): dti_naive = pd.date_range(freq="D", start=datetime(1998, 1, 1), periods=365) # GH#13303 dti_tz = pd.date_range( freq="D", start=datetime(1998, 1, 1), periods=365, tz="US/Eastern" ) for dti in [dti_naive, dti_tz]: assert dti.year[0] == 1998 assert dti.month[0] == 1 assert dti.day[0] == 1 assert dti.hour[0] == 0 assert dti.minute[0] == 0 assert dti.second[0] == 0 assert dti.microsecond[0] == 0 assert dti.dayofweek[0] == 3 assert dti.dayofyear[0] == 1 assert dti.dayofyear[120] == 121 assert dti.weekofyear[0] == 1 assert dti.weekofyear[120] == 18 assert dti.quarter[0] == 1 assert dti.quarter[120] == 2 assert dti.days_in_month[0] == 31 assert dti.days_in_month[90] == 30 assert dti.is_month_start[0] assert not dti.is_month_start[1] assert dti.is_month_start[31] assert dti.is_quarter_start[0] assert dti.is_quarter_start[90] assert dti.is_year_start[0] assert not dti.is_year_start[364] assert not dti.is_month_end[0] assert dti.is_month_end[30] assert not dti.is_month_end[31] assert dti.is_month_end[364] assert not dti.is_quarter_end[0] assert not dti.is_quarter_end[30] assert dti.is_quarter_end[89] assert dti.is_quarter_end[364] assert not dti.is_year_end[0] assert dti.is_year_end[364] assert len(dti.year) == 365 assert len(dti.month) == 365 assert len(dti.day) == 365 assert len(dti.hour) == 365 assert len(dti.minute) == 365 assert len(dti.second) == 365 assert len(dti.microsecond) == 365 assert len(dti.dayofweek) == 365 assert len(dti.dayofyear) == 365 assert len(dti.weekofyear) == 365 assert len(dti.quarter) == 365 assert len(dti.is_month_start) == 365 assert len(dti.is_month_end) == 365 assert len(dti.is_quarter_start) == 365 assert len(dti.is_quarter_end) == 365 assert len(dti.is_year_start) == 365 assert len(dti.is_year_end) == 365 dti.name = "name" # non boolean accessors -> return Index for accessor in DatetimeIndex._field_ops: res = getattr(dti, accessor) assert len(res) == 365 assert isinstance(res, Index) assert res.name == "name" # boolean accessors -> return array for accessor in DatetimeIndex._bool_ops: res = getattr(dti, accessor) assert len(res) == 365 assert isinstance(res, np.ndarray) # test boolean indexing res = dti[dti.is_quarter_start] exp = dti[[0, 90, 181, 273]] tm.assert_index_equal(res, exp) res = dti[dti.is_leap_year] exp = DatetimeIndex([], freq="D", tz=dti.tz, name="name") tm.assert_index_equal(res, exp) dti = pd.date_range(freq="BQ-FEB", start=datetime(1998, 1, 1), periods=4) assert sum(dti.is_quarter_start) == 0 assert sum(dti.is_quarter_end) == 4 assert sum(dti.is_year_start) == 0 assert sum(dti.is_year_end) == 1 # Ensure is_start/end accessors throw ValueError for CustomBusinessDay, bday_egypt = offsets.CustomBusinessDay(weekmask="Sun Mon Tue Wed Thu") dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt) msg = "Custom business days is not supported by is_month_start" with pytest.raises(ValueError, match=msg): dti.is_month_start dti = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"]) assert dti.is_month_start[0] == 1 tests = [ (Timestamp("2013-06-01", freq="M").is_month_start, 1), (Timestamp("2013-06-01", freq="BM").is_month_start, 0), (Timestamp("2013-06-03", freq="M").is_month_start, 0), (Timestamp("2013-06-03", freq="BM").is_month_start, 1), (Timestamp("2013-02-28", freq="Q-FEB").is_month_end, 1), (Timestamp("2013-02-28", freq="Q-FEB").is_quarter_end, 1), (Timestamp("2013-02-28", freq="Q-FEB").is_year_end, 1), (Timestamp("2013-03-01", freq="Q-FEB").is_month_start, 1), (Timestamp("2013-03-01", freq="Q-FEB").is_quarter_start, 1), (Timestamp("2013-03-01", freq="Q-FEB").is_year_start, 1), (Timestamp("2013-03-31", freq="QS-FEB").is_month_end, 1), (Timestamp("2013-03-31", freq="QS-FEB").is_quarter_end, 0), (Timestamp("2013-03-31", freq="QS-FEB").is_year_end, 0), (Timestamp("2013-02-01", freq="QS-FEB").is_month_start, 1), (Timestamp("2013-02-01", freq="QS-FEB").is_quarter_start, 1), (Timestamp("2013-02-01", freq="QS-FEB").is_year_start, 1), (Timestamp("2013-06-30", freq="BQ").is_month_end, 0), (Timestamp("2013-06-30", freq="BQ").is_quarter_end, 0), (Timestamp("2013-06-30", freq="BQ").is_year_end, 0), (Timestamp("2013-06-28", freq="BQ").is_month_end, 1), (Timestamp("2013-06-28", freq="BQ").is_quarter_end, 1), (Timestamp("2013-06-28", freq="BQ").is_year_end, 0), (Timestamp("2013-06-30", freq="BQS-APR").is_month_end, 0), (Timestamp("2013-06-30", freq="BQS-APR").is_quarter_end, 0), (Timestamp("2013-06-30", freq="BQS-APR").is_year_end, 0), (Timestamp("2013-06-28", freq="BQS-APR").is_month_end, 1), (Timestamp("2013-06-28", freq="BQS-APR").is_quarter_end, 1), (Timestamp("2013-03-29", freq="BQS-APR").is_year_end, 1), (Timestamp("2013-11-01", freq="AS-NOV").is_year_start, 1), (Timestamp("2013-10-31", freq="AS-NOV").is_year_end, 1), (Timestamp("2012-02-01").days_in_month, 29), (Timestamp("2013-02-01").days_in_month, 28), ] for ts, value in tests: assert ts == value # GH 6538: Check that DatetimeIndex and its TimeStamp elements # return the same weekofyear accessor close to new year w/ tz dates = ["2013/12/29", "2013/12/30", "2013/12/31"] dates = DatetimeIndex(dates, tz="Europe/Brussels") expected = [52, 1, 1] assert dates.weekofyear.tolist() == expected assert [d.weekofyear for d in dates] == expected
def test_duplicated_drop_duplicates_index(self): # GH 4060 for original in self.objs: if isinstance(original, Index): # special case if original.is_boolean(): result = original.drop_duplicates() expected = Index([False, True], name='a') tm.assert_index_equal(result, expected) continue # original doesn't have duplicates expected = np.array([False] * len(original), dtype=bool) duplicated = original.duplicated() tm.assert_numpy_array_equal(duplicated, expected) self.assertTrue(duplicated.dtype == bool) result = original.drop_duplicates() tm.assert_index_equal(result, original) self.assertFalse(result is original) # has_duplicates self.assertFalse(original.has_duplicates) # create repeated values, 3rd and 5th values are duplicated idx = original[list(range(len(original))) + [5, 3]] expected = np.array([False] * len(original) + [True, True], dtype=bool) duplicated = idx.duplicated() tm.assert_numpy_array_equal(duplicated, expected) self.assertTrue(duplicated.dtype == bool) tm.assert_index_equal(idx.drop_duplicates(), original) base = [False] * len(idx) base[3] = True base[5] = True expected = np.array(base) duplicated = idx.duplicated(keep='last') tm.assert_numpy_array_equal(duplicated, expected) self.assertTrue(duplicated.dtype == bool) result = idx.drop_duplicates(keep='last') tm.assert_index_equal(result, idx[~expected]) # deprecate take_last with tm.assert_produces_warning(FutureWarning): duplicated = idx.duplicated(take_last=True) tm.assert_numpy_array_equal(duplicated, expected) self.assertTrue(duplicated.dtype == bool) with tm.assert_produces_warning(FutureWarning): result = idx.drop_duplicates(take_last=True) tm.assert_index_equal(result, idx[~expected]) base = [False] * len(original) + [True, True] base[3] = True base[5] = True expected = np.array(base) duplicated = idx.duplicated(keep=False) tm.assert_numpy_array_equal(duplicated, expected) self.assertTrue(duplicated.dtype == bool) result = idx.drop_duplicates(keep=False) tm.assert_index_equal(result, idx[~expected]) with tm.assertRaisesRegexp( TypeError, r"drop_duplicates\(\) got an unexpected " "keyword argument"): idx.drop_duplicates(inplace=True) else: expected = Series([False] * len(original), index=original.index, name='a') tm.assert_series_equal(original.duplicated(), expected) result = original.drop_duplicates() tm.assert_series_equal(result, original) self.assertFalse(result is original) idx = original.index[list(range(len(original))) + [5, 3]] values = original._values[list(range(len(original))) + [5, 3]] s = Series(values, index=idx, name='a') expected = Series([False] * len(original) + [True, True], index=idx, name='a') tm.assert_series_equal(s.duplicated(), expected) tm.assert_series_equal(s.drop_duplicates(), original) base = [False] * len(idx) base[3] = True base[5] = True expected = Series(base, index=idx, name='a') tm.assert_series_equal(s.duplicated(keep='last'), expected) tm.assert_series_equal(s.drop_duplicates(keep='last'), s[~np.array(base)]) # deprecate take_last with tm.assert_produces_warning(FutureWarning): tm.assert_series_equal(s.duplicated(take_last=True), expected) with tm.assert_produces_warning(FutureWarning): tm.assert_series_equal(s.drop_duplicates(take_last=True), s[~np.array(base)]) base = [False] * len(original) + [True, True] base[3] = True base[5] = True expected = Series(base, index=idx, name='a') tm.assert_series_equal(s.duplicated(keep=False), expected) tm.assert_series_equal(s.drop_duplicates(keep=False), s[~np.array(base)]) s.drop_duplicates(inplace=True) tm.assert_series_equal(s, original)
def test_range_edges(self): # GH#13672 idx = pd.date_range( start=Timestamp("1970-01-01 00:00:00.000000001"), end=Timestamp("1970-01-01 00:00:00.000000004"), freq="N", ) exp = DatetimeIndex( [ "1970-01-01 00:00:00.000000001", "1970-01-01 00:00:00.000000002", "1970-01-01 00:00:00.000000003", "1970-01-01 00:00:00.000000004", ] ) tm.assert_index_equal(idx, exp) idx = pd.date_range( start=Timestamp("1970-01-01 00:00:00.000000004"), end=Timestamp("1970-01-01 00:00:00.000000001"), freq="N", ) exp = DatetimeIndex([]) tm.assert_index_equal(idx, exp) idx = pd.date_range( start=Timestamp("1970-01-01 00:00:00.000000001"), end=Timestamp("1970-01-01 00:00:00.000000001"), freq="N", ) exp = DatetimeIndex(["1970-01-01 00:00:00.000000001"]) tm.assert_index_equal(idx, exp) idx = pd.date_range( start=Timestamp("1970-01-01 00:00:00.000001"), end=Timestamp("1970-01-01 00:00:00.000004"), freq="U", ) exp = DatetimeIndex( [ "1970-01-01 00:00:00.000001", "1970-01-01 00:00:00.000002", "1970-01-01 00:00:00.000003", "1970-01-01 00:00:00.000004", ] ) tm.assert_index_equal(idx, exp) idx = pd.date_range( start=Timestamp("1970-01-01 00:00:00.001"), end=Timestamp("1970-01-01 00:00:00.004"), freq="L", ) exp = DatetimeIndex( [ "1970-01-01 00:00:00.001", "1970-01-01 00:00:00.002", "1970-01-01 00:00:00.003", "1970-01-01 00:00:00.004", ] ) tm.assert_index_equal(idx, exp) idx = pd.date_range( start=Timestamp("1970-01-01 00:00:01"), end=Timestamp("1970-01-01 00:00:04"), freq="S", ) exp = DatetimeIndex( [ "1970-01-01 00:00:01", "1970-01-01 00:00:02", "1970-01-01 00:00:03", "1970-01-01 00:00:04", ] ) tm.assert_index_equal(idx, exp) idx = pd.date_range( start=Timestamp("1970-01-01 00:01"), end=Timestamp("1970-01-01 00:04"), freq="T", ) exp = DatetimeIndex( [ "1970-01-01 00:01", "1970-01-01 00:02", "1970-01-01 00:03", "1970-01-01 00:04", ] ) tm.assert_index_equal(idx, exp) idx = pd.date_range( start=Timestamp("1970-01-01 01:00"), end=Timestamp("1970-01-01 04:00"), freq="H", ) exp = DatetimeIndex( [ "1970-01-01 01:00", "1970-01-01 02:00", "1970-01-01 03:00", "1970-01-01 04:00", ] ) tm.assert_index_equal(idx, exp) idx = pd.date_range( start=Timestamp("1970-01-01"), end=Timestamp("1970-01-04"), freq="D" ) exp = DatetimeIndex(["1970-01-01", "1970-01-02", "1970-01-03", "1970-01-04"]) tm.assert_index_equal(idx, exp)
def test_value_counts_bins(self): klasses = [Index, Series] for klass in klasses: s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a'] s = klass(s_values) # bins self.assertRaises(TypeError, lambda bins: s.value_counts(bins=bins), 1) s1 = Series([1, 1, 2, 3]) res1 = s1.value_counts(bins=1) exp1 = Series({0.998: 4}) tm.assert_series_equal(res1, exp1) res1n = s1.value_counts(bins=1, normalize=True) exp1n = Series({0.998: 1.0}) tm.assert_series_equal(res1n, exp1n) if isinstance(s1, Index): tm.assert_index_equal(s1.unique(), Index([1, 2, 3])) else: exp = np.array([1, 2, 3], dtype=np.int64) tm.assert_numpy_array_equal(s1.unique(), exp) self.assertEqual(s1.nunique(), 3) res4 = s1.value_counts(bins=4) exp4 = Series({ 0.998: 2, 1.5: 1, 2.0: 0, 2.5: 1 }, index=[0.998, 2.5, 1.5, 2.0]) tm.assert_series_equal(res4, exp4) res4n = s1.value_counts(bins=4, normalize=True) exp4n = Series({ 0.998: 0.5, 1.5: 0.25, 2.0: 0.0, 2.5: 0.25 }, index=[0.998, 2.5, 1.5, 2.0]) tm.assert_series_equal(res4n, exp4n) # handle NA's properly s_values = [ 'a', 'b', 'b', 'b', np.nan, np.nan, 'd', 'd', 'a', 'a', 'b' ] s = klass(s_values) expected = Series([4, 3, 2], index=['b', 'a', 'd']) tm.assert_series_equal(s.value_counts(), expected) if isinstance(s, Index): exp = Index(['a', 'b', np.nan, 'd']) tm.assert_index_equal(s.unique(), exp) else: exp = np.array(['a', 'b', np.nan, 'd'], dtype=object) tm.assert_numpy_array_equal(s.unique(), exp) self.assertEqual(s.nunique(), 3) s = klass({}) expected = Series([], dtype=np.int64) tm.assert_series_equal(s.value_counts(), expected, check_index_type=False) # returned dtype differs depending on original if isinstance(s, Index): self.assert_index_equal(s.unique(), Index([]), exact=False) else: self.assert_numpy_array_equal(s.unique(), np.array([]), check_dtype=False) self.assertEqual(s.nunique(), 0)
def test_calculated_against_csv(self): assert_index_equal(self.calendar.schedule.index, self.answers.index)
def test_transpose(self): for obj in self.objs: if isinstance(obj, Index): tm.assert_index_equal(obj.transpose(), obj) else: tm.assert_series_equal(obj.transpose(), obj)
def test_value_counts_datetime64(self): klasses = [Index, Series] for klass in klasses: # GH 3002, datetime64[ns] # don't test names though txt = "\n".join([ 'xxyyzz20100101PIE', 'xxyyzz20100101GUM', 'xxyyzz20100101EGG', 'xxyyww20090101EGG', 'foofoo20080909PIE', 'foofoo20080909GUM' ]) f = StringIO(txt) df = pd.read_fwf(f, widths=[6, 8, 3], names=["person_id", "dt", "food"], parse_dates=["dt"]) s = klass(df['dt'].copy()) s.name = None idx = pd.to_datetime([ '2010-01-01 00:00:00Z', '2008-09-09 00:00:00Z', '2009-01-01 00:00:00X' ]) expected_s = Series([3, 2, 1], index=idx) tm.assert_series_equal(s.value_counts(), expected_s) expected = np_array_datetime64_compat([ '2010-01-01 00:00:00Z', '2009-01-01 00:00:00Z', '2008-09-09 00:00:00Z' ], dtype='datetime64[ns]') if isinstance(s, Index): tm.assert_index_equal(s.unique(), DatetimeIndex(expected)) else: tm.assert_numpy_array_equal(s.unique(), expected) self.assertEqual(s.nunique(), 3) # with NaT s = df['dt'].copy() s = klass([v for v in s.values] + [pd.NaT]) result = s.value_counts() self.assertEqual(result.index.dtype, 'datetime64[ns]') tm.assert_series_equal(result, expected_s) result = s.value_counts(dropna=False) expected_s[pd.NaT] = 1 tm.assert_series_equal(result, expected_s) unique = s.unique() self.assertEqual(unique.dtype, 'datetime64[ns]') # numpy_array_equal cannot compare pd.NaT if isinstance(s, Index): exp_idx = DatetimeIndex(expected.tolist() + [pd.NaT]) tm.assert_index_equal(unique, exp_idx) else: tm.assert_numpy_array_equal(unique[:3], expected) self.assertTrue(pd.isnull(unique[3])) self.assertEqual(s.nunique(), 3) self.assertEqual(s.nunique(dropna=False), 4) # timedelta64[ns] td = df.dt - df.dt + timedelta(1) td = klass(td, name='dt') result = td.value_counts() expected_s = Series([6], index=[Timedelta('1day')], name='dt') tm.assert_series_equal(result, expected_s) expected = TimedeltaIndex(['1 days'], name='dt') if isinstance(td, Index): tm.assert_index_equal(td.unique(), expected) else: tm.assert_numpy_array_equal(td.unique(), expected.values) td2 = timedelta(1) + (df.dt - df.dt) td2 = klass(td2, name='dt') result2 = td2.value_counts() tm.assert_series_equal(result2, expected_s)
def test_intersection_bug(self): # GH #771 a = bdate_range('11/30/2011', '12/31/2011', freq='C') b = bdate_range('12/10/2011', '12/20/2011', freq='C') result = a.intersection(b) tm.assert_index_equal(result, b)
def test_value_counts_unique_nunique_null(self): for null_obj in [np.nan, None]: for orig in self.objs: o = orig.copy() klass = type(o) values = o._values if not self._allow_na_ops(o): continue # special assign to the numpy array if is_datetimetz(o): if isinstance(o, DatetimeIndex): v = o.asi8 v[0:2] = pd.tslib.iNaT values = o._shallow_copy(v) else: o = o.copy() o[0:2] = pd.tslib.iNaT values = o._values elif needs_i8_conversion(o): values[0:2] = pd.tslib.iNaT values = o._shallow_copy(values) else: values[0:2] = null_obj # check values has the same dtype as the original self.assertEqual(values.dtype, o.dtype) # create repeated values, 'n'th element is repeated by n+1 # times if isinstance(o, (DatetimeIndex, PeriodIndex)): expected_index = o.copy() expected_index.name = None # attach name to klass o = klass(values.repeat(range(1, len(o) + 1))) o.name = 'a' else: if is_datetimetz(o): expected_index = orig._values._shallow_copy(values) else: expected_index = pd.Index(values) expected_index.name = None o = o.repeat(range(1, len(o) + 1)) o.name = 'a' # check values has the same dtype as the original self.assertEqual(o.dtype, orig.dtype) # check values correctly have NaN nanloc = np.zeros(len(o), dtype=np.bool) nanloc[:3] = True if isinstance(o, Index): self.assert_numpy_array_equal(pd.isnull(o), nanloc) else: exp = pd.Series(nanloc, o.index, name='a') self.assert_series_equal(pd.isnull(o), exp) expected_s_na = Series(list(range(10, 2, -1)) + [3], index=expected_index[9:0:-1], dtype='int64', name='a') expected_s = Series(list(range(10, 2, -1)), index=expected_index[9:1:-1], dtype='int64', name='a') result_s_na = o.value_counts(dropna=False) tm.assert_series_equal(result_s_na, expected_s_na) self.assertTrue(result_s_na.index.name is None) self.assertEqual(result_s_na.name, 'a') result_s = o.value_counts() tm.assert_series_equal(o.value_counts(), expected_s) self.assertTrue(result_s.index.name is None) self.assertEqual(result_s.name, 'a') result = o.unique() if isinstance(o, Index): tm.assert_index_equal(result, Index(values[1:], name='a')) elif is_datetimetz(o): # unable to compare NaT / nan tm.assert_numpy_array_equal(result[1:], values[2:].asobject.values) self.assertIs(result[0], pd.NaT) else: tm.assert_numpy_array_equal(result[1:], values[2:]) self.assertTrue(pd.isnull(result[0])) self.assertEqual(result.dtype, orig.dtype) self.assertEqual(o.nunique(), 8) self.assertEqual(o.nunique(dropna=False), 9)
def test_slice_specialised(self): # scalar indexing res = self.index[1] expected = 2 assert res == expected res = self.index[-1] expected = 18 assert res == expected # slicing # slice value completion index = self.index[:] expected = self.index tm.assert_index_equal(index, expected) # positive slice values index = self.index[7:10:2] expected = Index(np.array([14, 18]), name='foo') tm.assert_index_equal(index, expected) # negative slice values index = self.index[-1:-5:-2] expected = Index(np.array([18, 14]), name='foo') tm.assert_index_equal(index, expected) # stop overshoot index = self.index[2:100:4] expected = Index(np.array([4, 12]), name='foo') tm.assert_index_equal(index, expected) # reverse index = self.index[::-1] expected = Index(self.index.values[::-1], name='foo') tm.assert_index_equal(index, expected) index = self.index[-8::-1] expected = Index(np.array([4, 2, 0]), name='foo') tm.assert_index_equal(index, expected) index = self.index[-40::-1] expected = Index(np.array([], dtype=np.int64), name='foo') tm.assert_index_equal(index, expected) index = self.index[40::-1] expected = Index(self.index.values[40::-1], name='foo') tm.assert_index_equal(index, expected) index = self.index[10::-1] expected = Index(self.index.values[::-1], name='foo') tm.assert_index_equal(index, expected)
def test_intersection(self): # GH 4690 (with tz) for tz in [None, 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific']: base = date_range('6/1/2000', '6/30/2000', freq='D', name='idx') # if target has the same name, it is preserved rng2 = date_range('5/15/2000', '6/20/2000', freq='D', name='idx') expected2 = date_range('6/1/2000', '6/20/2000', freq='D', name='idx') # if target name is different, it will be reset rng3 = date_range('5/15/2000', '6/20/2000', freq='D', name='other') expected3 = date_range('6/1/2000', '6/20/2000', freq='D', name=None) rng4 = date_range('7/1/2000', '7/31/2000', freq='D', name='idx') expected4 = DatetimeIndex([], name='idx') for (rng, expected) in [(rng2, expected2), (rng3, expected3), (rng4, expected4)]: result = base.intersection(rng) tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == expected.freq assert result.tz == expected.tz # non-monotonic base = DatetimeIndex( ['2011-01-05', '2011-01-04', '2011-01-02', '2011-01-03'], tz=tz, name='idx') rng2 = DatetimeIndex( ['2011-01-04', '2011-01-02', '2011-02-02', '2011-02-03'], tz=tz, name='idx') expected2 = DatetimeIndex(['2011-01-04', '2011-01-02'], tz=tz, name='idx') rng3 = DatetimeIndex( ['2011-01-04', '2011-01-02', '2011-02-02', '2011-02-03'], tz=tz, name='other') expected3 = DatetimeIndex(['2011-01-04', '2011-01-02'], tz=tz, name=None) # GH 7880 rng4 = date_range('7/1/2000', '7/31/2000', freq='D', tz=tz, name='idx') expected4 = DatetimeIndex([], tz=tz, name='idx') for (rng, expected) in [(rng2, expected2), (rng3, expected3), (rng4, expected4)]: result = base.intersection(rng) tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq is None assert result.tz == expected.tz # empty same freq GH2129 rng = date_range('6/1/2000', '6/15/2000', freq='T') result = rng[0:0].intersection(rng) assert len(result) == 0 result = rng.intersection(rng[0:0]) assert len(result) == 0
def test_repr_roundtrip(self): tm.assert_index_equal(eval(repr(self.index)), self.index)
def test_union(self): i1 = Int64Index(np.arange(0, 20, 2)) i2 = Int64Index(np.arange(10, 30, 2)) result = i1.union(i2) expected = Int64Index(np.arange(0, 30, 2)) tm.assert_index_equal(result, expected)
def test_intersection(self): # intersect with Int64Index other = Index(np.arange(1, 6)) result = self.index.intersection(other) expected = Index( np.sort(np.intersect1d(self.index.values, other.values))) tm.assert_index_equal(result, expected) result = other.intersection(self.index) expected = Index( np.sort(np.asarray(np.intersect1d(self.index.values, other.values)))) tm.assert_index_equal(result, expected) # intersect with increasing RangeIndex other = RangeIndex(1, 6) result = self.index.intersection(other) expected = Index( np.sort(np.intersect1d(self.index.values, other.values))) tm.assert_index_equal(result, expected) # intersect with decreasing RangeIndex other = RangeIndex(5, 0, -1) result = self.index.intersection(other) expected = Index( np.sort(np.intersect1d(self.index.values, other.values))) tm.assert_index_equal(result, expected) # reversed (GH 17296) result = other.intersection(self.index) tm.assert_index_equal(result, expected) # GH 17296: intersect two decreasing RangeIndexes first = RangeIndex(10, -2, -2) other = RangeIndex(5, -4, -1) expected = first.astype(int).intersection(other.astype(int)) result = first.intersection(other).astype(int) tm.assert_index_equal(result, expected) # reversed result = other.intersection(first).astype(int) tm.assert_index_equal(result, expected) index = RangeIndex(5) # intersect of non-overlapping indices other = RangeIndex(5, 10, 1) result = index.intersection(other) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) other = RangeIndex(-1, -5, -1) result = index.intersection(other) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) # intersection of empty indices other = RangeIndex(0, 0, 1) result = index.intersection(other) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) result = other.intersection(index) tm.assert_index_equal(result, expected) # intersection of non-overlapping values based on start value and gcd index = RangeIndex(1, 10, 2) other = RangeIndex(0, 10, 4) result = index.intersection(other) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected)
def test_ufunc_compat(self): idx = RangeIndex(5) result = np.sin(idx) expected = Float64Index(np.sin(np.arange(5, dtype='int64'))) tm.assert_index_equal(result, expected)
def test_numeric_compat2(self): # validate that we are handling the RangeIndex overrides to numeric ops # and returning RangeIndex where possible idx = RangeIndex(0, 10, 2) result = idx * 2 expected = RangeIndex(0, 20, 4) tm.assert_index_equal(result, expected, exact=True) result = idx + 2 expected = RangeIndex(2, 12, 2) tm.assert_index_equal(result, expected, exact=True) result = idx - 2 expected = RangeIndex(-2, 8, 2) tm.assert_index_equal(result, expected, exact=True) # truediv under PY3 result = idx / 2 if PY3: expected = RangeIndex(0, 5, 1).astype('float64') else: expected = RangeIndex(0, 5, 1) tm.assert_index_equal(result, expected, exact=True) result = idx / 4 expected = RangeIndex(0, 10, 2) / 4 tm.assert_index_equal(result, expected, exact=True) result = idx // 1 expected = idx tm.assert_index_equal(result, expected, exact=True) # __mul__ result = idx * idx expected = Index(idx.values * idx.values) tm.assert_index_equal(result, expected, exact=True) # __pow__ idx = RangeIndex(0, 1000, 2) result = idx**2 expected = idx._int64index**2 tm.assert_index_equal(Index(result.values), expected, exact=True) # __floordiv__ cases_exact = [ (RangeIndex(0, 1000, 2), 2, RangeIndex(0, 500, 1)), (RangeIndex(-99, -201, -3), -3, RangeIndex(33, 67, 1)), (RangeIndex(0, 1000, 1), 2, RangeIndex(0, 1000, 1)._int64index // 2), (RangeIndex(0, 100, 1), 2.0, RangeIndex(0, 100, 1)._int64index // 2.0), (RangeIndex(0), 50, RangeIndex(0)), (RangeIndex(2, 4, 2), 3, RangeIndex(0, 1, 1)), (RangeIndex(-5, -10, -6), 4, RangeIndex(-2, -1, 1)), (RangeIndex(-100, -200, 3), 2, RangeIndex(0)) ] for idx, div, expected in cases_exact: tm.assert_index_equal(idx // div, expected, exact=True)
def test_constructor(self): index = RangeIndex(5) expected = np.arange(5, dtype=np.int64) assert isinstance(index, RangeIndex) assert index._start == 0 assert index._stop == 5 assert index._step == 1 assert index.name is None tm.assert_index_equal(Index(expected), index) index = RangeIndex(1, 5) expected = np.arange(1, 5, dtype=np.int64) assert isinstance(index, RangeIndex) assert index._start == 1 tm.assert_index_equal(Index(expected), index) index = RangeIndex(1, 5, 2) expected = np.arange(1, 5, 2, dtype=np.int64) assert isinstance(index, RangeIndex) assert index._step == 2 tm.assert_index_equal(Index(expected), index) msg = "RangeIndex\\(\\.\\.\\.\\) must be called with integers" with tm.assert_raises_regex(TypeError, msg): RangeIndex() for index in [ RangeIndex(0), RangeIndex(start=0), RangeIndex(stop=0), RangeIndex(0, 0) ]: expected = np.empty(0, dtype=np.int64) assert isinstance(index, RangeIndex) assert index._start == 0 assert index._stop == 0 assert index._step == 1 tm.assert_index_equal(Index(expected), index) with tm.assert_raises_regex(TypeError, msg): RangeIndex(name='Foo') for index in [ RangeIndex(0, name='Foo'), RangeIndex(start=0, name='Foo'), RangeIndex(stop=0, name='Foo'), RangeIndex(0, 0, name='Foo') ]: assert isinstance(index, RangeIndex) assert index.name == 'Foo' # we don't allow on a bare Index pytest.raises(TypeError, lambda: Index(0, 1000)) # invalid args for i in [ Index(['a', 'b']), Series(['a', 'b']), np.array(['a', 'b']), [], 'foo', datetime(2000, 1, 1, 0, 0), np.arange(0, 10), np.array([1]), [1] ]: pytest.raises(TypeError, lambda: RangeIndex(i))
def test_combineSeries(self): # Series series = self.frame.xs(self.frame.index[0]) added = self.frame + series for key, s in compat.iteritems(added): assert_series_equal(s, self.frame[key] + series[key]) larger_series = series.to_dict() larger_series['E'] = 1 larger_series = Series(larger_series) larger_added = self.frame + larger_series for key, s in compat.iteritems(self.frame): assert_series_equal(larger_added[key], s + series[key]) assert 'E' in larger_added assert np.isnan(larger_added['E']).all() # no upcast needed added = self.mixed_float + series _check_mixed_float(added) # vs mix (upcast) as needed added = self.mixed_float + series.astype('float32') _check_mixed_float(added, dtype=dict(C=None)) added = self.mixed_float + series.astype('float16') _check_mixed_float(added, dtype=dict(C=None)) # these raise with numexpr.....as we are adding an int64 to an # uint64....weird vs int # added = self.mixed_int + (100*series).astype('int64') # _check_mixed_int(added, dtype = dict(A = 'int64', B = 'float64', C = # 'int64', D = 'int64')) # added = self.mixed_int + (100*series).astype('int32') # _check_mixed_int(added, dtype = dict(A = 'int32', B = 'float64', C = # 'int32', D = 'int64')) # TimeSeries ts = self.tsframe['A'] # 10890 # we no longer allow auto timeseries broadcasting # and require explicit broadcasting added = self.tsframe.add(ts, axis='index') for key, col in compat.iteritems(self.tsframe): result = col + ts assert_series_equal(added[key], result, check_names=False) assert added[key].name == key if col.name == ts.name: assert result.name == 'A' else: assert result.name is None smaller_frame = self.tsframe[:-5] smaller_added = smaller_frame.add(ts, axis='index') tm.assert_index_equal(smaller_added.index, self.tsframe.index) smaller_ts = ts[:-5] smaller_added2 = self.tsframe.add(smaller_ts, axis='index') assert_frame_equal(smaller_added, smaller_added2) # length 0, result is all-nan result = self.tsframe.add(ts[:0], axis='index') expected = DataFrame(np.nan, index=self.tsframe.index, columns=self.tsframe.columns) assert_frame_equal(result, expected) # Frame is all-nan result = self.tsframe[:0].add(ts, axis='index') expected = DataFrame(np.nan, index=self.tsframe.index, columns=self.tsframe.columns) assert_frame_equal(result, expected) # empty but with non-empty index frame = self.tsframe[:1].reindex(columns=[]) result = frame.mul(ts, axis='index') assert len(result) == len(ts)
def test_join_non_int_index(self): other = Index([3, 6, 7, 8, 10], dtype=object) outer = self.index.join(other, how='outer') outer2 = other.join(self.index, how='outer') expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, 16, 18]) tm.assert_index_equal(outer, outer2) tm.assert_index_equal(outer, expected) inner = self.index.join(other, how='inner') inner2 = other.join(self.index, how='inner') expected = Index([6, 8, 10]) tm.assert_index_equal(inner, inner2) tm.assert_index_equal(inner, expected) left = self.index.join(other, how='left') tm.assert_index_equal(left, self.index.astype(object)) left2 = other.join(self.index, how='left') tm.assert_index_equal(left2, other) right = self.index.join(other, how='right') tm.assert_index_equal(right, other) right2 = other.join(self.index, how='right') tm.assert_index_equal(right2, self.index.astype(object))
def test_integer_index_astype_datetime(self, tz, dtype): # GH 20997, 20964 val = [pd.Timestamp('2018-01-01', tz=tz).value] result = pd.Index(val).astype(dtype) expected = pd.DatetimeIndex(['2018-01-01'], tz=tz) tm.assert_index_equal(result, expected)
def test_to_csv_multiindex(self): frame = self.frame old_index = frame.index arrays = np.arange(len(old_index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=["first", "second"]) frame.index = new_index with ensure_clean("__tmp_to_csv_multiindex__") as path: frame.to_csv(path, header=False) frame.to_csv(path, columns=["A", "B"]) # round trip frame.to_csv(path) df = self.read_csv(path, index_col=[0, 1], parse_dates=False) # TODO to_csv drops column name assert_frame_equal(frame, df, check_names=False) assert frame.index.names == df.index.names # needed if setUp becomes a class method self.frame.index = old_index # try multiindex with dates tsframe = self.tsframe old_index = tsframe.index new_index = [old_index, np.arange(len(old_index))] tsframe.index = MultiIndex.from_arrays(new_index) tsframe.to_csv(path, index_label=["time", "foo"]) recons = self.read_csv(path, index_col=[0, 1]) # TODO to_csv drops column name assert_frame_equal(tsframe, recons, check_names=False) # do not load index tsframe.to_csv(path) recons = self.read_csv(path, index_col=None) assert len(recons.columns) == len(tsframe.columns) + 2 # no index tsframe.to_csv(path, index=False) recons = self.read_csv(path, index_col=None) assert_almost_equal(recons.values, self.tsframe.values) # needed if setUp becomes class method self.tsframe.index = old_index with ensure_clean("__tmp_to_csv_multiindex__") as path: # GH3571, GH1651, GH3141 def _make_frame(names=None): if names is True: names = ["first", "second"] return DataFrame( np.random.randint(0, 10, size=(3, 3)), columns=MultiIndex.from_tuples([("bah", "foo"), ("bah", "bar"), ("ban", "baz")], names=names), dtype="int64", ) # column & index are multi-index df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) df.to_csv(path) result = read_csv(path, header=[0, 1, 2, 3], index_col=[0, 1]) assert_frame_equal(df, result) # column is mi df = mkdf(5, 3, r_idx_nlevels=1, c_idx_nlevels=4) df.to_csv(path) result = read_csv(path, header=[0, 1, 2, 3], index_col=0) assert_frame_equal(df, result) # dup column names? df = mkdf(5, 3, r_idx_nlevels=3, c_idx_nlevels=4) df.to_csv(path) result = read_csv(path, header=[0, 1, 2, 3], index_col=[0, 1, 2]) assert_frame_equal(df, result) # writing with no index df = _make_frame() df.to_csv(path, index=False) result = read_csv(path, header=[0, 1]) assert_frame_equal(df, result) # we lose the names here df = _make_frame(True) df.to_csv(path, index=False) result = read_csv(path, header=[0, 1]) assert com.all_none(*result.columns.names) result.columns.names = df.columns.names assert_frame_equal(df, result) # whatsnew example df = _make_frame() df.to_csv(path) result = read_csv(path, header=[0, 1], index_col=[0]) assert_frame_equal(df, result) df = _make_frame(True) df.to_csv(path) result = read_csv(path, header=[0, 1], index_col=[0]) assert_frame_equal(df, result) # invalid options df = _make_frame(True) df.to_csv(path) for i in [6, 7]: msg = "len of {i}, but only 5 lines in file".format(i=i) with pytest.raises(ParserError, match=msg): read_csv(path, header=list(range(i)), index_col=0) # write with cols msg = "cannot specify cols with a MultiIndex" with pytest.raises(TypeError, match=msg): df.to_csv(path, columns=["foo", "bar"]) with ensure_clean("__tmp_to_csv_multiindex__") as path: # empty tsframe[:0].to_csv(path) recons = self.read_csv(path) exp = tsframe[:0] exp.index = [] tm.assert_index_equal(recons.columns, exp.columns) assert len(recons) == 0
def test_to_datetime_unit(self): epoch = 1370745748 s = Series([epoch + t for t in range(20)]) result = to_datetime(s, unit='s') expected = Series([ Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for t in range(20) ]) assert_series_equal(result, expected) s = Series([epoch + t for t in range(20)]).astype(float) result = to_datetime(s, unit='s') expected = Series([ Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for t in range(20) ]) assert_series_equal(result, expected) s = Series([epoch + t for t in range(20)] + [iNaT]) result = to_datetime(s, unit='s') expected = Series([ Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for t in range(20) ] + [NaT]) assert_series_equal(result, expected) s = Series([epoch + t for t in range(20)] + [iNaT]).astype(float) result = to_datetime(s, unit='s') expected = Series([ Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for t in range(20) ] + [NaT]) assert_series_equal(result, expected) # GH13834 s = Series([epoch + t for t in np.arange(0, 2, .25)] + [iNaT]).astype(float) result = to_datetime(s, unit='s') expected = Series([ Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for t in np.arange(0, 2, .25) ] + [NaT]) assert_series_equal(result, expected) s = concat([ Series([epoch + t for t in range(20)]).astype(float), Series([np.nan]) ], ignore_index=True) result = to_datetime(s, unit='s') expected = Series([ Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for t in range(20) ] + [NaT]) assert_series_equal(result, expected) result = to_datetime([1, 2, 'NaT', pd.NaT, np.nan], unit='D') expected = DatetimeIndex( [Timestamp('1970-01-02'), Timestamp('1970-01-03')] + ['NaT'] * 3) tm.assert_index_equal(result, expected) with pytest.raises(ValueError): to_datetime([1, 2, 'foo'], unit='D') with pytest.raises(ValueError): to_datetime([1, 2, 111111111], unit='D') # coerce we can process expected = DatetimeIndex( [Timestamp('1970-01-02'), Timestamp('1970-01-03')] + ['NaT'] * 1) result = to_datetime([1, 2, 'foo'], unit='D', errors='coerce') tm.assert_index_equal(result, expected) result = to_datetime([1, 2, 111111111], unit='D', errors='coerce') tm.assert_index_equal(result, expected)
def test_combineFrame(self): frame_copy = self.frame.reindex(self.frame.index[::2]) del frame_copy['D'] frame_copy['C'][:5] = nan added = self.frame + frame_copy indexer = added['A'].dropna().index exp = (self.frame['A'] * 2).copy() tm.assert_series_equal(added['A'].dropna(), exp.loc[indexer]) exp.loc[~exp.index.isin(indexer)] = np.nan tm.assert_series_equal(added['A'], exp.loc[added['A'].index]) assert np.isnan(added['C'].reindex(frame_copy.index)[:5]).all() # assert(False) assert np.isnan(added['D']).all() self_added = self.frame + self.frame tm.assert_index_equal(self_added.index, self.frame.index) added_rev = frame_copy + self.frame assert np.isnan(added['D']).all() assert np.isnan(added_rev['D']).all() # corner cases # empty plus_empty = self.frame + self.empty assert np.isnan(plus_empty.values).all() empty_plus = self.empty + self.frame assert np.isnan(empty_plus.values).all() empty_empty = self.empty + self.empty assert empty_empty.empty # out of order reverse = self.frame.reindex(columns=self.frame.columns[::-1]) assert_frame_equal(reverse + self.frame, self.frame * 2) # mix vs float64, upcast added = self.frame + self.mixed_float _check_mixed_float(added, dtype='float64') added = self.mixed_float + self.frame _check_mixed_float(added, dtype='float64') # mix vs mix added = self.mixed_float + self.mixed_float2 _check_mixed_float(added, dtype=dict(C=None)) added = self.mixed_float2 + self.mixed_float _check_mixed_float(added, dtype=dict(C=None)) # with int added = self.frame + self.mixed_int _check_mixed_float(added, dtype='float64')