def test_date_parser_resolution_if_not_ns(all_parsers): # see gh-10245 parser = all_parsers data = """\ date,time,prn,rxstatus 2013-11-03,19:00:00,126,00E80000 2013-11-03,19:00:00,23,00E80000 2013-11-03,19:00:00,13,00E80000 """ def date_parser(dt, time): return np_array_datetime64_compat(dt + "T" + time + "Z", dtype="datetime64[s]") result = parser.read_csv(StringIO(data), date_parser=date_parser, parse_dates={"datetime": ["date", "time"]}, index_col=["datetime", "prn"]) datetimes = np_array_datetime64_compat(["2013-11-03T19:00:00Z"] * 3, dtype="datetime64[s]") expected = DataFrame(data={"rxstatus": ["00E80000"] * 3}, index=MultiIndex.from_tuples( [(datetimes[0], 126), (datetimes[1], 23), (datetimes[2], 13)], names=["datetime", "prn"])) tm.assert_frame_equal(result, expected)
def test_dateparser_resolution_if_not_ns(self): # GH 10245 data = """\ date,time,prn,rxstatus 2013-11-03,19:00:00,126,00E80000 2013-11-03,19:00:00,23,00E80000 2013-11-03,19:00:00,13,00E80000 """ def date_parser(date, time): datetime = np_array_datetime64_compat( date + 'T' + time + 'Z', dtype='datetime64[s]') return datetime df = self.read_csv(StringIO(data), date_parser=date_parser, parse_dates={'datetime': ['date', 'time']}, index_col=['datetime', 'prn']) datetimes = np_array_datetime64_compat(['2013-11-03T19:00:00Z'] * 3, dtype='datetime64[s]') df_correct = DataFrame(data={'rxstatus': ['00E80000'] * 3}, index=MultiIndex.from_tuples( [(datetimes[0], 126), (datetimes[1], 23), (datetimes[2], 13)], names=['datetime', 'prn'])) tm.assert_frame_equal(df, df_correct)
def test_parsing_valid_dates(self): arr = np.array(['01-01-2013', '01-02-2013'], dtype=object) result, _ = tslib.array_to_datetime(arr) expected = ['2013-01-01T00:00:00.000000000-0000', '2013-01-02T00:00:00.000000000-0000'] tm.assert_numpy_array_equal( result, np_array_datetime64_compat(expected, dtype='M8[ns]')) arr = np.array(['Mon Sep 16 2013', 'Tue Sep 17 2013'], dtype=object) result, _ = tslib.array_to_datetime(arr) expected = ['2013-09-16T00:00:00.000000000-0000', '2013-09-17T00:00:00.000000000-0000'] tm.assert_numpy_array_equal( result, np_array_datetime64_compat(expected, dtype='M8[ns]'))
def test_coerce_outside_ns_bounds_one_valid(): arr = np.array(["1/1/1000", "1/1/2000"], dtype=object) result, _ = tslib.array_to_datetime(arr, errors="coerce") expected = [iNaT, "2000-01-01T00:00:00.000000000-0000"] expected = np_array_datetime64_compat(expected, dtype="M8[ns]") tm.assert_numpy_array_equal(result, expected)
def test_coerce_outside_ns_bounds_one_valid(self): arr = np.array(['1/1/1000', '1/1/2000'], dtype=object) result, _ = tslib.array_to_datetime(arr, errors='coerce') expected = [iNaT, '2000-01-01T00:00:00.000000000-0000'] tm.assert_numpy_array_equal( result, np_array_datetime64_compat(expected, dtype='M8[ns]'))
def test_datetime_subclass(data, expected): # GH 25851 # ensure that subclassed datetime works with # array_to_datetime arr = np.array(data, dtype=object) result, _ = tslib.array_to_datetime(arr) expected = np_array_datetime64_compat(expected, dtype="M8[ns]") tm.assert_numpy_array_equal(result, expected)
def test_coerce_of_invalid_datetimes(errors): arr = np.array(["01-01-2013", "not_a_date", "1"], dtype=object) kwargs = dict(values=arr, errors=errors) if errors == "ignore": # Without coercing, the presence of any invalid # dates prevents any values from being converted. result, _ = tslib.array_to_datetime(**kwargs) tm.assert_numpy_array_equal(result, arr) else: # coerce. # With coercing, the invalid dates becomes iNaT result, _ = tslib.array_to_datetime(arr, errors="coerce") expected = ["2013-01-01T00:00:00.000000000-0000", iNaT, iNaT] tm.assert_numpy_array_equal( result, np_array_datetime64_compat(expected, dtype="M8[ns]"))
def test_coerce_of_invalid_datetimes(self): arr = np.array(['01-01-2013', 'not_a_date', '1'], dtype=object) # Without coercing, the presence of any invalid dates prevents # any values from being converted result, _ = tslib.array_to_datetime(arr, errors='ignore') tm.assert_numpy_array_equal(result, arr) # With coercing, the invalid dates becomes iNaT result, _ = tslib.array_to_datetime(arr, errors='coerce') expected = [ '2013-01-01T00:00:00.000000000-0000', tslib.iNaT, tslib.iNaT ] tm.assert_numpy_array_equal( result, np_array_datetime64_compat(expected, dtype='M8[ns]'))
def test_coerce_of_invalid_datetimes(self): arr = np.array(['01-01-2013', 'not_a_date', '1'], dtype=object) # Without coercing, the presence of any invalid dates prevents # any values from being converted result, _ = tslib.array_to_datetime(arr, errors='ignore') tm.assert_numpy_array_equal(result, arr) # With coercing, the invalid dates becomes iNaT result, _ = tslib.array_to_datetime(arr, errors='coerce') expected = ['2013-01-01T00:00:00.000000000-0000', iNaT, iNaT] tm.assert_numpy_array_equal( result, np_array_datetime64_compat(expected, dtype='M8[ns]'))
def test_coerce_of_invalid_datetimes(errors): arr = np.array(["01-01-2013", "not_a_date", "1"], dtype=object) kwargs = dict(values=arr, errors=errors) if errors == "ignore": # Without coercing, the presence of any invalid # dates prevents any values from being converted. result, _ = tslib.array_to_datetime(**kwargs) tm.assert_numpy_array_equal(result, arr) else: # coerce. # With coercing, the invalid dates becomes iNaT result, _ = tslib.array_to_datetime(arr, errors="coerce") expected = ["2013-01-01T00:00:00.000000000-0000", iNaT, iNaT] tm.assert_numpy_array_equal( result, np_array_datetime64_compat(expected, dtype="M8[ns]"))
def test_datetime64_dtype_array_returned(self): # GH 9431 expected = np_array_datetime64_compat( ['2015-01-03T00:00:00.000000000+0000', '2015-01-01T00:00:00.000000000+0000'], dtype='M8[ns]') dt_index = pd.to_datetime(['2015-01-03T00:00:00.000000000+0000', '2015-01-01T00:00:00.000000000+0000', '2015-01-01T00:00:00.000000000+0000']) result = algos.unique(dt_index) tm.assert_numpy_array_equal(result, expected) self.assertEqual(result.dtype, expected.dtype) s = pd.Series(dt_index) result = algos.unique(s) tm.assert_numpy_array_equal(result, expected) self.assertEqual(result.dtype, expected.dtype) arr = s.values result = algos.unique(arr) tm.assert_numpy_array_equal(result, expected) self.assertEqual(result.dtype, expected.dtype)
def test_datetime64_dtype_array_returned(self): # GH 9431 expected = np_array_datetime64_compat( ['2015-01-03T00:00:00.000000000+0000', '2015-01-01T00:00:00.000000000+0000'], dtype='M8[ns]') dt_index = pd.to_datetime(['2015-01-03T00:00:00.000000000+0000', '2015-01-01T00:00:00.000000000+0000', '2015-01-01T00:00:00.000000000+0000']) result = algos.unique(dt_index) tm.assert_numpy_array_equal(result, expected) self.assertEqual(result.dtype, expected.dtype) s = pd.Series(dt_index) result = algos.unique(s) tm.assert_numpy_array_equal(result, expected) self.assertEqual(result.dtype, expected.dtype) arr = s.values result = algos.unique(arr) tm.assert_numpy_array_equal(result, expected) self.assertEqual(result.dtype, expected.dtype)
def test_value_counts_datetime64(self, klass): # GH 3002, datetime64[ns] # don't test names though txt = "\n".join(['xxyyzz20100101PIE', 'xxyyzz20100101GUM', 'xxyyzz20100101EGG', 'xxyyww20090101EGG', 'foofoo20080909PIE', 'foofoo20080909GUM']) f = StringIO(txt) df = pd.read_fwf(f, widths=[6, 8, 3], names=["person_id", "dt", "food"], parse_dates=["dt"]) s = klass(df['dt'].copy()) s.name = None idx = pd.to_datetime(['2010-01-01 00:00:00', '2008-09-09 00:00:00', '2009-01-01 00:00:00']) expected_s = Series([3, 2, 1], index=idx) tm.assert_series_equal(s.value_counts(), expected_s) expected = np_array_datetime64_compat(['2010-01-01 00:00:00', '2009-01-01 00:00:00', '2008-09-09 00:00:00'], dtype='datetime64[ns]') if isinstance(s, Index): tm.assert_index_equal(s.unique(), DatetimeIndex(expected)) else: tm.assert_numpy_array_equal(s.unique(), expected) assert s.nunique() == 3 # with NaT s = df['dt'].copy() s = klass([v for v in s.values] + [pd.NaT]) result = s.value_counts() assert result.index.dtype == 'datetime64[ns]' tm.assert_series_equal(result, expected_s) result = s.value_counts(dropna=False) expected_s[pd.NaT] = 1 tm.assert_series_equal(result, expected_s) unique = s.unique() assert unique.dtype == 'datetime64[ns]' # numpy_array_equal cannot compare pd.NaT if isinstance(s, Index): exp_idx = DatetimeIndex(expected.tolist() + [pd.NaT]) tm.assert_index_equal(unique, exp_idx) else: tm.assert_numpy_array_equal(unique[:3], expected) assert pd.isna(unique[3]) assert s.nunique() == 3 assert s.nunique(dropna=False) == 4 # timedelta64[ns] td = df.dt - df.dt + timedelta(1) td = klass(td, name='dt') result = td.value_counts() expected_s = Series([6], index=[Timedelta('1day')], name='dt') tm.assert_series_equal(result, expected_s) expected = TimedeltaIndex(['1 days'], name='dt') if isinstance(td, Index): tm.assert_index_equal(td.unique(), expected) else: tm.assert_numpy_array_equal(td.unique(), expected.values) td2 = timedelta(1) + (df.dt - df.dt) td2 = klass(td2, name='dt') result2 = td2.value_counts() tm.assert_series_equal(result2, expected_s)
def test_value_counts_datetime64(self): klasses = [Index, Series] for klass in klasses: # GH 3002, datetime64[ns] # don't test names though txt = "\n".join(['xxyyzz20100101PIE', 'xxyyzz20100101GUM', 'xxyyzz20100101EGG', 'xxyyww20090101EGG', 'foofoo20080909PIE', 'foofoo20080909GUM']) f = StringIO(txt) df = pd.read_fwf(f, widths=[6, 8, 3], names=["person_id", "dt", "food"], parse_dates=["dt"]) s = klass(df['dt'].copy()) s.name = None idx = pd.to_datetime(['2010-01-01 00:00:00Z', '2008-09-09 00:00:00Z', '2009-01-01 00:00:00X']) expected_s = Series([3, 2, 1], index=idx) tm.assert_series_equal(s.value_counts(), expected_s) expected = np_array_datetime64_compat(['2010-01-01 00:00:00Z', '2009-01-01 00:00:00Z', '2008-09-09 00:00:00Z'], dtype='datetime64[ns]') if isinstance(s, DatetimeIndex): self.assert_index_equal(s.unique(), DatetimeIndex(expected)) else: self.assert_numpy_array_equal(s.unique(), expected) self.assertEqual(s.nunique(), 3) # with NaT s = df['dt'].copy() s = klass([v for v in s.values] + [pd.NaT]) result = s.value_counts() self.assertEqual(result.index.dtype, 'datetime64[ns]') tm.assert_series_equal(result, expected_s) result = s.value_counts(dropna=False) expected_s[pd.NaT] = 1 tm.assert_series_equal(result, expected_s) unique = s.unique() self.assertEqual(unique.dtype, 'datetime64[ns]') # numpy_array_equal cannot compare pd.NaT if isinstance(s, DatetimeIndex): self.assert_index_equal(unique[:3], DatetimeIndex(expected)) else: self.assert_numpy_array_equal(unique[:3], expected) self.assertTrue(unique[3] is pd.NaT or unique[3].astype('int64') == pd.tslib.iNaT) self.assertEqual(s.nunique(), 3) self.assertEqual(s.nunique(dropna=False), 4) # timedelta64[ns] td = df.dt - df.dt + timedelta(1) td = klass(td, name='dt') result = td.value_counts() expected_s = Series([6], index=[Timedelta('1day')], name='dt') tm.assert_series_equal(result, expected_s) expected = TimedeltaIndex(['1 days'], name='dt') if isinstance(td, TimedeltaIndex): self.assert_index_equal(td.unique(), expected) else: self.assert_numpy_array_equal(td.unique(), expected.values) td2 = timedelta(1) + (df.dt - df.dt) td2 = klass(td2, name='dt') result2 = td2.value_counts() tm.assert_series_equal(result2, expected_s)
def test_value_counts_datetime64(self, index_or_series): klass = index_or_series # GH 3002, datetime64[ns] # don't test names though txt = "\n".join([ "xxyyzz20100101PIE", "xxyyzz20100101GUM", "xxyyzz20100101EGG", "xxyyww20090101EGG", "foofoo20080909PIE", "foofoo20080909GUM", ]) f = StringIO(txt) df = pd.read_fwf(f, widths=[6, 8, 3], names=["person_id", "dt", "food"], parse_dates=["dt"]) s = klass(df["dt"].copy()) s.name = None idx = pd.to_datetime([ "2010-01-01 00:00:00", "2008-09-09 00:00:00", "2009-01-01 00:00:00" ]) expected_s = Series([3, 2, 1], index=idx) tm.assert_series_equal(s.value_counts(), expected_s) expected = np_array_datetime64_compat( [ "2010-01-01 00:00:00", "2009-01-01 00:00:00", "2008-09-09 00:00:00" ], dtype="datetime64[ns]", ) if isinstance(s, Index): tm.assert_index_equal(s.unique(), DatetimeIndex(expected)) else: tm.assert_numpy_array_equal(s.unique(), expected) assert s.nunique() == 3 # with NaT s = df["dt"].copy() s = klass(list(s.values) + [pd.NaT]) result = s.value_counts() assert result.index.dtype == "datetime64[ns]" tm.assert_series_equal(result, expected_s) result = s.value_counts(dropna=False) expected_s[pd.NaT] = 1 tm.assert_series_equal(result, expected_s) unique = s.unique() assert unique.dtype == "datetime64[ns]" # numpy_array_equal cannot compare pd.NaT if isinstance(s, Index): exp_idx = DatetimeIndex(expected.tolist() + [pd.NaT]) tm.assert_index_equal(unique, exp_idx) else: tm.assert_numpy_array_equal(unique[:3], expected) assert pd.isna(unique[3]) assert s.nunique() == 3 assert s.nunique(dropna=False) == 4 # timedelta64[ns] td = df.dt - df.dt + timedelta(1) td = klass(td, name="dt") result = td.value_counts() expected_s = Series([6], index=[Timedelta("1day")], name="dt") tm.assert_series_equal(result, expected_s) expected = TimedeltaIndex(["1 days"], name="dt") if isinstance(td, Index): tm.assert_index_equal(td.unique(), expected) else: tm.assert_numpy_array_equal(td.unique(), expected.values) td2 = timedelta(1) + (df.dt - df.dt) td2 = klass(td2, name="dt") result2 = td2.value_counts() tm.assert_series_equal(result2, expected_s)
def date_parser(date, time): datetime = np_array_datetime64_compat(date + 'T' + time + 'Z', dtype='datetime64[s]') return datetime
def date_parser(dt, time): return np_array_datetime64_compat(dt + "T" + time + "Z", dtype="datetime64[s]")
def test_parsing_valid_dates(data, expected): arr = np.array(data, dtype=object) result, _ = tslib.array_to_datetime(arr) expected = np_array_datetime64_compat(expected, dtype="M8[ns]") tm.assert_numpy_array_equal(result, expected)
def test_coerce_outside_ns_bounds_one_valid(self): arr = np.array(['1/1/1000', '1/1/2000'], dtype=object) result, _ = tslib.array_to_datetime(arr, errors='coerce') expected = [tslib.iNaT, '2000-01-01T00:00:00.000000000-0000'] tm.assert_numpy_array_equal( result, np_array_datetime64_compat(expected, dtype='M8[ns]'))
def date_parser(date, time): datetime = np_array_datetime64_compat( date + 'T' + time + 'Z', dtype='datetime64[s]') return datetime
def test_value_counts_inferred(self): klasses = [Index, Series] for klass in klasses: s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a'] s = klass(s_values) expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c']) tm.assert_series_equal(s.value_counts(), expected) self.assert_numpy_array_equal(s.unique(), np.unique(s_values)) self.assertEqual(s.nunique(), 4) # don't sort, have to sort after the fact as not sorting is # platform-dep hist = s.value_counts(sort=False).sort_values() expected = Series([3, 1, 4, 2], index=list('acbd')).sort_values() tm.assert_series_equal(hist, expected) # sort ascending hist = s.value_counts(ascending=True) expected = Series([1, 2, 3, 4], index=list('cdab')) tm.assert_series_equal(hist, expected) # relative histogram. hist = s.value_counts(normalize=True) expected = Series([.4, .3, .2, .1], index=['b', 'a', 'd', 'c']) tm.assert_series_equal(hist, expected) # bins self.assertRaises(TypeError, lambda bins: s.value_counts(bins=bins), 1) s1 = Series([1, 1, 2, 3]) res1 = s1.value_counts(bins=1) exp1 = Series({0.998: 4}) tm.assert_series_equal(res1, exp1) res1n = s1.value_counts(bins=1, normalize=True) exp1n = Series({0.998: 1.0}) tm.assert_series_equal(res1n, exp1n) self.assert_numpy_array_equal(s1.unique(), np.array([1, 2, 3])) self.assertEqual(s1.nunique(), 3) res4 = s1.value_counts(bins=4) exp4 = Series({ 0.998: 2, 1.5: 1, 2.0: 0, 2.5: 1 }, index=[0.998, 2.5, 1.5, 2.0]) tm.assert_series_equal(res4, exp4) res4n = s1.value_counts(bins=4, normalize=True) exp4n = Series({ 0.998: 0.5, 1.5: 0.25, 2.0: 0.0, 2.5: 0.25 }, index=[0.998, 2.5, 1.5, 2.0]) tm.assert_series_equal(res4n, exp4n) # handle NA's properly s_values = [ 'a', 'b', 'b', 'b', np.nan, np.nan, 'd', 'd', 'a', 'a', 'b' ] s = klass(s_values) expected = Series([4, 3, 2], index=['b', 'a', 'd']) tm.assert_series_equal(s.value_counts(), expected) self.assert_numpy_array_equal( s.unique(), np.array(['a', 'b', np.nan, 'd'], dtype='O')) self.assertEqual(s.nunique(), 3) s = klass({}) expected = Series([], dtype=np.int64) tm.assert_series_equal(s.value_counts(), expected, check_index_type=False) self.assert_numpy_array_equal(s.unique(), np.array([])) self.assertEqual(s.nunique(), 0) # GH 3002, datetime64[ns] # don't test names though txt = "\n".join([ 'xxyyzz20100101PIE', 'xxyyzz20100101GUM', 'xxyyzz20100101EGG', 'xxyyww20090101EGG', 'foofoo20080909PIE', 'foofoo20080909GUM' ]) f = StringIO(txt) df = pd.read_fwf(f, widths=[6, 8, 3], names=["person_id", "dt", "food"], parse_dates=["dt"]) s = klass(df['dt'].copy()) s.name = None idx = pd.to_datetime([ '2010-01-01 00:00:00Z', '2008-09-09 00:00:00Z', '2009-01-01 00:00:00X' ]) expected_s = Series([3, 2, 1], index=idx) tm.assert_series_equal(s.value_counts(), expected_s) expected = np_array_datetime64_compat([ '2010-01-01 00:00:00Z', '2009-01-01 00:00:00Z', '2008-09-09 00:00:00Z' ], dtype='datetime64[ns]') if isinstance(s, DatetimeIndex): expected = DatetimeIndex(expected) self.assertTrue(s.unique().equals(expected)) else: self.assert_numpy_array_equal(s.unique(), expected) self.assertEqual(s.nunique(), 3) # with NaT s = df['dt'].copy() s = klass([v for v in s.values] + [pd.NaT]) result = s.value_counts() self.assertEqual(result.index.dtype, 'datetime64[ns]') tm.assert_series_equal(result, expected_s) result = s.value_counts(dropna=False) expected_s[pd.NaT] = 1 tm.assert_series_equal(result, expected_s) unique = s.unique() self.assertEqual(unique.dtype, 'datetime64[ns]') # numpy_array_equal cannot compare pd.NaT self.assert_numpy_array_equal(unique[:3], expected) self.assertTrue(unique[3] is pd.NaT or unique[3].astype('int64') == pd.tslib.iNaT) self.assertEqual(s.nunique(), 3) self.assertEqual(s.nunique(dropna=False), 4) # timedelta64[ns] td = df.dt - df.dt + timedelta(1) td = klass(td, name='dt') result = td.value_counts() expected_s = Series([6], index=[Timedelta('1day')], name='dt') tm.assert_series_equal(result, expected_s) expected = TimedeltaIndex(['1 days']) if isinstance(td, TimedeltaIndex): self.assertTrue(td.unique().equals(expected)) else: self.assert_numpy_array_equal(td.unique(), expected.values) td2 = timedelta(1) + (df.dt - df.dt) td2 = klass(td2, name='dt') result2 = td2.value_counts() tm.assert_series_equal(result2, expected_s)
def test_parsing_valid_dates(data, expected): arr = np.array(data, dtype=object) result, _ = tslib.array_to_datetime(arr) expected = np_array_datetime64_compat(expected, dtype="M8[ns]") tm.assert_numpy_array_equal(result, expected)
def date_parser(dt, time): return np_array_datetime64_compat(dt + "T" + time + "Z", dtype="datetime64[s]")
def test_value_counts_inferred(self): klasses = [Index, Series] for klass in klasses: s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a'] s = klass(s_values) expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c']) tm.assert_series_equal(s.value_counts(), expected) exp = np.unique(np.array(s_values, dtype=np.object_)) self.assert_numpy_array_equal(s.unique(), exp) self.assertEqual(s.nunique(), 4) # don't sort, have to sort after the fact as not sorting is # platform-dep hist = s.value_counts(sort=False).sort_values() expected = Series([3, 1, 4, 2], index=list('acbd')).sort_values() tm.assert_series_equal(hist, expected) # sort ascending hist = s.value_counts(ascending=True) expected = Series([1, 2, 3, 4], index=list('cdab')) tm.assert_series_equal(hist, expected) # relative histogram. hist = s.value_counts(normalize=True) expected = Series([.4, .3, .2, .1], index=['b', 'a', 'd', 'c']) tm.assert_series_equal(hist, expected) # bins self.assertRaises(TypeError, lambda bins: s.value_counts(bins=bins), 1) s1 = Series([1, 1, 2, 3]) res1 = s1.value_counts(bins=1) exp1 = Series({0.998: 4}) tm.assert_series_equal(res1, exp1) res1n = s1.value_counts(bins=1, normalize=True) exp1n = Series({0.998: 1.0}) tm.assert_series_equal(res1n, exp1n) self.assert_numpy_array_equal(s1.unique(), np.array([1, 2, 3], dtype=np.int64)) self.assertEqual(s1.nunique(), 3) res4 = s1.value_counts(bins=4) exp4 = Series({0.998: 2, 1.5: 1, 2.0: 0, 2.5: 1}, index=[0.998, 2.5, 1.5, 2.0]) tm.assert_series_equal(res4, exp4) res4n = s1.value_counts(bins=4, normalize=True) exp4n = Series( {0.998: 0.5, 1.5: 0.25, 2.0: 0.0, 2.5: 0.25}, index=[0.998, 2.5, 1.5, 2.0]) tm.assert_series_equal(res4n, exp4n) # handle NA's properly s_values = ['a', 'b', 'b', 'b', np.nan, np.nan, 'd', 'd', 'a', 'a', 'b'] s = klass(s_values) expected = Series([4, 3, 2], index=['b', 'a', 'd']) tm.assert_series_equal(s.value_counts(), expected) exp = np.array(['a', 'b', np.nan, 'd'], dtype=np.object_) self.assert_numpy_array_equal(s.unique(), exp) self.assertEqual(s.nunique(), 3) s = klass({}) expected = Series([], dtype=np.int64) tm.assert_series_equal(s.value_counts(), expected, check_index_type=False) # returned dtype differs depending on original self.assert_numpy_array_equal(s.unique(), np.array([]), check_dtype=False) self.assertEqual(s.nunique(), 0) # GH 3002, datetime64[ns] # don't test names though txt = "\n".join(['xxyyzz20100101PIE', 'xxyyzz20100101GUM', 'xxyyzz20100101EGG', 'xxyyww20090101EGG', 'foofoo20080909PIE', 'foofoo20080909GUM']) f = StringIO(txt) df = pd.read_fwf(f, widths=[6, 8, 3], names=["person_id", "dt", "food"], parse_dates=["dt"]) s = klass(df['dt'].copy()) s.name = None idx = pd.to_datetime( ['2010-01-01 00:00:00Z', '2008-09-09 00:00:00Z', '2009-01-01 00:00:00X']) expected_s = Series([3, 2, 1], index=idx) tm.assert_series_equal(s.value_counts(), expected_s) expected = np_array_datetime64_compat(['2010-01-01 00:00:00Z', '2009-01-01 00:00:00Z', '2008-09-09 00:00:00Z'], dtype='datetime64[ns]') if isinstance(s, DatetimeIndex): expected = DatetimeIndex(expected) self.assertTrue(s.unique().equals(expected)) else: self.assert_numpy_array_equal(s.unique(), expected) self.assertEqual(s.nunique(), 3) # with NaT s = df['dt'].copy() s = klass([v for v in s.values] + [pd.NaT]) result = s.value_counts() self.assertEqual(result.index.dtype, 'datetime64[ns]') tm.assert_series_equal(result, expected_s) result = s.value_counts(dropna=False) expected_s[pd.NaT] = 1 tm.assert_series_equal(result, expected_s) unique = s.unique() self.assertEqual(unique.dtype, 'datetime64[ns]') # numpy_array_equal cannot compare pd.NaT self.assert_numpy_array_equal(unique[:3], expected) self.assertTrue(unique[3] is pd.NaT or unique[3].astype('int64') == pd.tslib.iNaT) self.assertEqual(s.nunique(), 3) self.assertEqual(s.nunique(dropna=False), 4) # timedelta64[ns] td = df.dt - df.dt + timedelta(1) td = klass(td, name='dt') result = td.value_counts() expected_s = Series([6], index=[Timedelta('1day')], name='dt') tm.assert_series_equal(result, expected_s) expected = TimedeltaIndex(['1 days']) if isinstance(td, TimedeltaIndex): self.assertTrue(td.unique().equals(expected)) else: self.assert_numpy_array_equal(td.unique(), expected.values) td2 = timedelta(1) + (df.dt - df.dt) td2 = klass(td2, name='dt') result2 = td2.value_counts() tm.assert_series_equal(result2, expected_s)