def test_date_parser_resolution_if_not_ns(all_parsers): # see gh-10245 parser = all_parsers data = """\ date,time,prn,rxstatus 2013-11-03,19:00:00,126,00E80000 2013-11-03,19:00:00,23,00E80000 2013-11-03,19:00:00,13,00E80000 """ def date_parser(dt, time): return np_array_datetime64_compat(dt + "T" + time + "Z", dtype="datetime64[s]") result = parser.read_csv( StringIO(data), date_parser=date_parser, parse_dates={"datetime": ["date", "time"]}, index_col=["datetime", "prn"], ) datetimes = np_array_datetime64_compat(["2013-11-03T19:00:00Z"] * 3, dtype="datetime64[s]") expected = DataFrame( data={"rxstatus": ["00E80000"] * 3}, index=MultiIndex.from_tuples( [(datetimes[0], 126), (datetimes[1], 23), (datetimes[2], 13)], names=["datetime", "prn"], ), ) tm.assert_frame_equal(result, expected)
def test_coerce_outside_ns_bounds_one_valid(): arr = np.array(["1/1/1000", "1/1/2000"], dtype=object) result, _ = tslib.array_to_datetime(arr, errors="coerce") expected = [iNaT, "2000-01-01T00:00:00.000000000-0000"] expected = np_array_datetime64_compat(expected, dtype="M8[ns]") tm.assert_numpy_array_equal(result, expected)
def test_datetime_subclass(data, expected): # GH 25851 # ensure that subclassed datetime works with # array_to_datetime arr = np.array(data, dtype=object) result, _ = tslib.array_to_datetime(arr) expected = np_array_datetime64_compat(expected, dtype="M8[ns]") tm.assert_numpy_array_equal(result, expected)
def test_coerce_of_invalid_datetimes(errors): arr = np.array(["01-01-2013", "not_a_date", "1"], dtype=object) kwargs = {"values": arr, "errors": errors} if errors == "ignore": # Without coercing, the presence of any invalid # dates prevents any values from being converted. result, _ = tslib.array_to_datetime(**kwargs) tm.assert_numpy_array_equal(result, arr) else: # coerce. # With coercing, the invalid dates becomes iNaT result, _ = tslib.array_to_datetime(arr, errors="coerce") expected = ["2013-01-01T00:00:00.000000000-0000", iNaT, iNaT] tm.assert_numpy_array_equal( result, np_array_datetime64_compat(expected, dtype="M8[ns]"))
def test_value_counts_datetime64(index_or_series): klass = index_or_series # GH 3002, datetime64[ns] # don't test names though txt = "\n".join( [ "xxyyzz20100101PIE", "xxyyzz20100101GUM", "xxyyzz20100101EGG", "xxyyww20090101EGG", "foofoo20080909PIE", "foofoo20080909GUM", ] ) f = StringIO(txt) df = pd.read_fwf( f, widths=[6, 8, 3], names=["person_id", "dt", "food"], parse_dates=["dt"] ) s = klass(df["dt"].copy()) s.name = None idx = pd.to_datetime( ["2010-01-01 00:00:00", "2008-09-09 00:00:00", "2009-01-01 00:00:00"] ) expected_s = Series([3, 2, 1], index=idx) tm.assert_series_equal(s.value_counts(), expected_s) expected = np_array_datetime64_compat( ["2010-01-01 00:00:00", "2009-01-01 00:00:00", "2008-09-09 00:00:00"], dtype="datetime64[ns]", ) if isinstance(s, Index): tm.assert_index_equal(s.unique(), DatetimeIndex(expected)) else: tm.assert_numpy_array_equal(s.unique(), expected) assert s.nunique() == 3 # with NaT s = df["dt"].copy() s = klass(list(s.values) + [pd.NaT] * 4) result = s.value_counts() assert result.index.dtype == "datetime64[ns]" tm.assert_series_equal(result, expected_s) result = s.value_counts(dropna=False) expected_s = pd.concat([Series([4], index=DatetimeIndex([pd.NaT])), expected_s]) tm.assert_series_equal(result, expected_s) assert s.dtype == "datetime64[ns]" unique = s.unique() assert unique.dtype == "datetime64[ns]" # numpy_array_equal cannot compare pd.NaT if isinstance(s, Index): exp_idx = DatetimeIndex(expected.tolist() + [pd.NaT]) tm.assert_index_equal(unique, exp_idx) else: tm.assert_numpy_array_equal(unique[:3], expected) assert pd.isna(unique[3]) assert s.nunique() == 3 assert s.nunique(dropna=False) == 4 # timedelta64[ns] td = df.dt - df.dt + timedelta(1) td = klass(td, name="dt") result = td.value_counts() expected_s = Series([6], index=[Timedelta("1day")], name="dt") tm.assert_series_equal(result, expected_s) expected = TimedeltaIndex(["1 days"], name="dt") if isinstance(td, Index): tm.assert_index_equal(td.unique(), expected) else: tm.assert_numpy_array_equal(td.unique(), expected.values) td2 = timedelta(1) + (df.dt - df.dt) td2 = klass(td2, name="dt") result2 = td2.value_counts() tm.assert_series_equal(result2, expected_s)
def date_parser(dt, time): return np_array_datetime64_compat(dt + "T" + time + "Z", dtype="datetime64[s]")
def test_parsing_valid_dates(data, expected): arr = np.array(data, dtype=object) result, _ = tslib.array_to_datetime(arr) expected = np_array_datetime64_compat(expected, dtype="M8[ns]") tm.assert_numpy_array_equal(result, expected)