コード例 #1
0
ファイル: test_parse_dates.py プロジェクト: chrish42/pandas
def test_date_parser_resolution_if_not_ns(all_parsers):
    # see gh-10245
    parser = all_parsers
    data = """\
date,time,prn,rxstatus
2013-11-03,19:00:00,126,00E80000
2013-11-03,19:00:00,23,00E80000
2013-11-03,19:00:00,13,00E80000
"""

    def date_parser(dt, time):
        return np_array_datetime64_compat(dt + "T" + time + "Z",
                                          dtype="datetime64[s]")

    result = parser.read_csv(StringIO(data), date_parser=date_parser,
                             parse_dates={"datetime": ["date", "time"]},
                             index_col=["datetime", "prn"])

    datetimes = np_array_datetime64_compat(["2013-11-03T19:00:00Z"] * 3,
                                           dtype="datetime64[s]")
    expected = DataFrame(data={"rxstatus": ["00E80000"] * 3},
                         index=MultiIndex.from_tuples(
                             [(datetimes[0], 126), (datetimes[1], 23),
                              (datetimes[2], 13)], names=["datetime", "prn"]))
    tm.assert_frame_equal(result, expected)
コード例 #2
0
ファイル: parse_dates.py プロジェクト: mwaskom/pandas
    def test_dateparser_resolution_if_not_ns(self):
        # GH 10245
        data = """\
date,time,prn,rxstatus
2013-11-03,19:00:00,126,00E80000
2013-11-03,19:00:00,23,00E80000
2013-11-03,19:00:00,13,00E80000
"""

        def date_parser(date, time):
            datetime = np_array_datetime64_compat(
                date + 'T' + time + 'Z', dtype='datetime64[s]')
            return datetime

        df = self.read_csv(StringIO(data), date_parser=date_parser,
                           parse_dates={'datetime': ['date', 'time']},
                           index_col=['datetime', 'prn'])

        datetimes = np_array_datetime64_compat(['2013-11-03T19:00:00Z'] * 3,
                                               dtype='datetime64[s]')
        df_correct = DataFrame(data={'rxstatus': ['00E80000'] * 3},
                               index=MultiIndex.from_tuples(
                                   [(datetimes[0], 126),
                                    (datetimes[1], 23),
                                    (datetimes[2], 13)],
                               names=['datetime', 'prn']))
        tm.assert_frame_equal(df, df_correct)
コード例 #3
0
    def test_parsing_valid_dates(self):
        arr = np.array(['01-01-2013', '01-02-2013'], dtype=object)
        result, _ = tslib.array_to_datetime(arr)
        expected = ['2013-01-01T00:00:00.000000000-0000',
                    '2013-01-02T00:00:00.000000000-0000']
        tm.assert_numpy_array_equal(
            result,
            np_array_datetime64_compat(expected, dtype='M8[ns]'))

        arr = np.array(['Mon Sep 16 2013', 'Tue Sep 17 2013'], dtype=object)
        result, _ = tslib.array_to_datetime(arr)
        expected = ['2013-09-16T00:00:00.000000000-0000',
                    '2013-09-17T00:00:00.000000000-0000']
        tm.assert_numpy_array_equal(
            result,
            np_array_datetime64_compat(expected, dtype='M8[ns]'))
コード例 #4
0
def test_coerce_outside_ns_bounds_one_valid():
    arr = np.array(["1/1/1000", "1/1/2000"], dtype=object)
    result, _ = tslib.array_to_datetime(arr, errors="coerce")

    expected = [iNaT, "2000-01-01T00:00:00.000000000-0000"]
    expected = np_array_datetime64_compat(expected, dtype="M8[ns]")

    tm.assert_numpy_array_equal(result, expected)
コード例 #5
0
 def test_coerce_outside_ns_bounds_one_valid(self):
     arr = np.array(['1/1/1000', '1/1/2000'], dtype=object)
     result, _ = tslib.array_to_datetime(arr, errors='coerce')
     expected = [iNaT,
                 '2000-01-01T00:00:00.000000000-0000']
     tm.assert_numpy_array_equal(
         result,
         np_array_datetime64_compat(expected, dtype='M8[ns]'))
コード例 #6
0
def test_datetime_subclass(data, expected):
    # GH 25851
    # ensure that subclassed datetime works with
    # array_to_datetime

    arr = np.array(data, dtype=object)
    result, _ = tslib.array_to_datetime(arr)

    expected = np_array_datetime64_compat(expected, dtype="M8[ns]")
    tm.assert_numpy_array_equal(result, expected)
コード例 #7
0
def test_coerce_of_invalid_datetimes(errors):
    arr = np.array(["01-01-2013", "not_a_date", "1"], dtype=object)
    kwargs = dict(values=arr, errors=errors)

    if errors == "ignore":
        # Without coercing, the presence of any invalid
        # dates prevents any values from being converted.
        result, _ = tslib.array_to_datetime(**kwargs)
        tm.assert_numpy_array_equal(result, arr)
    else:  # coerce.
        # With coercing, the invalid dates becomes iNaT
        result, _ = tslib.array_to_datetime(arr, errors="coerce")
        expected = ["2013-01-01T00:00:00.000000000-0000", iNaT, iNaT]

        tm.assert_numpy_array_equal(
            result, np_array_datetime64_compat(expected, dtype="M8[ns]"))
コード例 #8
0
    def test_coerce_of_invalid_datetimes(self):
        arr = np.array(['01-01-2013', 'not_a_date', '1'], dtype=object)

        # Without coercing, the presence of any invalid dates prevents
        # any values from being converted
        result, _ = tslib.array_to_datetime(arr, errors='ignore')
        tm.assert_numpy_array_equal(result, arr)

        # With coercing, the invalid dates becomes iNaT
        result, _ = tslib.array_to_datetime(arr, errors='coerce')
        expected = [
            '2013-01-01T00:00:00.000000000-0000', tslib.iNaT, tslib.iNaT
        ]

        tm.assert_numpy_array_equal(
            result, np_array_datetime64_compat(expected, dtype='M8[ns]'))
コード例 #9
0
    def test_coerce_of_invalid_datetimes(self):
        arr = np.array(['01-01-2013', 'not_a_date', '1'], dtype=object)

        # Without coercing, the presence of any invalid dates prevents
        # any values from being converted
        result, _ = tslib.array_to_datetime(arr, errors='ignore')
        tm.assert_numpy_array_equal(result, arr)

        # With coercing, the invalid dates becomes iNaT
        result, _ = tslib.array_to_datetime(arr, errors='coerce')
        expected = ['2013-01-01T00:00:00.000000000-0000',
                    iNaT,
                    iNaT]

        tm.assert_numpy_array_equal(
            result,
            np_array_datetime64_compat(expected, dtype='M8[ns]'))
コード例 #10
0
def test_coerce_of_invalid_datetimes(errors):
    arr = np.array(["01-01-2013", "not_a_date", "1"], dtype=object)
    kwargs = dict(values=arr, errors=errors)

    if errors == "ignore":
        # Without coercing, the presence of any invalid
        # dates prevents any values from being converted.
        result, _ = tslib.array_to_datetime(**kwargs)
        tm.assert_numpy_array_equal(result, arr)
    else:  # coerce.
        # With coercing, the invalid dates becomes iNaT
        result, _ = tslib.array_to_datetime(arr, errors="coerce")
        expected = ["2013-01-01T00:00:00.000000000-0000",
                    iNaT,
                    iNaT]

        tm.assert_numpy_array_equal(
            result,
            np_array_datetime64_compat(expected, dtype="M8[ns]"))
コード例 #11
0
ファイル: test_algos.py プロジェクト: yizhiyong/pandas
    def test_datetime64_dtype_array_returned(self):
        # GH 9431
        expected = np_array_datetime64_compat(
            ['2015-01-03T00:00:00.000000000+0000',
             '2015-01-01T00:00:00.000000000+0000'],
            dtype='M8[ns]')

        dt_index = pd.to_datetime(['2015-01-03T00:00:00.000000000+0000',
                                   '2015-01-01T00:00:00.000000000+0000',
                                   '2015-01-01T00:00:00.000000000+0000'])
        result = algos.unique(dt_index)
        tm.assert_numpy_array_equal(result, expected)
        self.assertEqual(result.dtype, expected.dtype)

        s = pd.Series(dt_index)
        result = algos.unique(s)
        tm.assert_numpy_array_equal(result, expected)
        self.assertEqual(result.dtype, expected.dtype)

        arr = s.values
        result = algos.unique(arr)
        tm.assert_numpy_array_equal(result, expected)
        self.assertEqual(result.dtype, expected.dtype)
コード例 #12
0
ファイル: test_algos.py プロジェクト: awolf78/pandas
    def test_datetime64_dtype_array_returned(self):
        # GH 9431
        expected = np_array_datetime64_compat(
            ['2015-01-03T00:00:00.000000000+0000',
             '2015-01-01T00:00:00.000000000+0000'],
            dtype='M8[ns]')

        dt_index = pd.to_datetime(['2015-01-03T00:00:00.000000000+0000',
                                   '2015-01-01T00:00:00.000000000+0000',
                                   '2015-01-01T00:00:00.000000000+0000'])
        result = algos.unique(dt_index)
        tm.assert_numpy_array_equal(result, expected)
        self.assertEqual(result.dtype, expected.dtype)

        s = pd.Series(dt_index)
        result = algos.unique(s)
        tm.assert_numpy_array_equal(result, expected)
        self.assertEqual(result.dtype, expected.dtype)

        arr = s.values
        result = algos.unique(arr)
        tm.assert_numpy_array_equal(result, expected)
        self.assertEqual(result.dtype, expected.dtype)
コード例 #13
0
ファイル: test_base.py プロジェクト: wuhaochen/pandas
    def test_value_counts_datetime64(self, klass):

        # GH 3002, datetime64[ns]
        # don't test names though
        txt = "\n".join(['xxyyzz20100101PIE', 'xxyyzz20100101GUM',
                         'xxyyzz20100101EGG', 'xxyyww20090101EGG',
                         'foofoo20080909PIE', 'foofoo20080909GUM'])
        f = StringIO(txt)
        df = pd.read_fwf(f, widths=[6, 8, 3],
                         names=["person_id", "dt", "food"],
                         parse_dates=["dt"])

        s = klass(df['dt'].copy())
        s.name = None
        idx = pd.to_datetime(['2010-01-01 00:00:00',
                              '2008-09-09 00:00:00',
                              '2009-01-01 00:00:00'])
        expected_s = Series([3, 2, 1], index=idx)
        tm.assert_series_equal(s.value_counts(), expected_s)

        expected = np_array_datetime64_compat(['2010-01-01 00:00:00',
                                               '2009-01-01 00:00:00',
                                               '2008-09-09 00:00:00'],
                                              dtype='datetime64[ns]')
        if isinstance(s, Index):
            tm.assert_index_equal(s.unique(), DatetimeIndex(expected))
        else:
            tm.assert_numpy_array_equal(s.unique(), expected)

        assert s.nunique() == 3

        # with NaT
        s = df['dt'].copy()
        s = klass([v for v in s.values] + [pd.NaT])

        result = s.value_counts()
        assert result.index.dtype == 'datetime64[ns]'
        tm.assert_series_equal(result, expected_s)

        result = s.value_counts(dropna=False)
        expected_s[pd.NaT] = 1
        tm.assert_series_equal(result, expected_s)

        unique = s.unique()
        assert unique.dtype == 'datetime64[ns]'

        # numpy_array_equal cannot compare pd.NaT
        if isinstance(s, Index):
            exp_idx = DatetimeIndex(expected.tolist() + [pd.NaT])
            tm.assert_index_equal(unique, exp_idx)
        else:
            tm.assert_numpy_array_equal(unique[:3], expected)
            assert pd.isna(unique[3])

        assert s.nunique() == 3
        assert s.nunique(dropna=False) == 4

        # timedelta64[ns]
        td = df.dt - df.dt + timedelta(1)
        td = klass(td, name='dt')

        result = td.value_counts()
        expected_s = Series([6], index=[Timedelta('1day')], name='dt')
        tm.assert_series_equal(result, expected_s)

        expected = TimedeltaIndex(['1 days'], name='dt')
        if isinstance(td, Index):
            tm.assert_index_equal(td.unique(), expected)
        else:
            tm.assert_numpy_array_equal(td.unique(), expected.values)

        td2 = timedelta(1) + (df.dt - df.dt)
        td2 = klass(td2, name='dt')
        result2 = td2.value_counts()
        tm.assert_series_equal(result2, expected_s)
コード例 #14
0
ファイル: test_base.py プロジェクト: smoofra/pandas
    def test_value_counts_datetime64(self):
        klasses = [Index, Series]
        for klass in klasses:
            # GH 3002, datetime64[ns]
            # don't test names though
            txt = "\n".join(['xxyyzz20100101PIE', 'xxyyzz20100101GUM',
                             'xxyyzz20100101EGG', 'xxyyww20090101EGG',
                             'foofoo20080909PIE', 'foofoo20080909GUM'])
            f = StringIO(txt)
            df = pd.read_fwf(f, widths=[6, 8, 3],
                             names=["person_id", "dt", "food"],
                             parse_dates=["dt"])

            s = klass(df['dt'].copy())
            s.name = None

            idx = pd.to_datetime(['2010-01-01 00:00:00Z',
                                  '2008-09-09 00:00:00Z',
                                  '2009-01-01 00:00:00X'])
            expected_s = Series([3, 2, 1], index=idx)
            tm.assert_series_equal(s.value_counts(), expected_s)

            expected = np_array_datetime64_compat(['2010-01-01 00:00:00Z',
                                                   '2009-01-01 00:00:00Z',
                                                   '2008-09-09 00:00:00Z'],
                                                  dtype='datetime64[ns]')
            if isinstance(s, DatetimeIndex):
                self.assert_index_equal(s.unique(), DatetimeIndex(expected))
            else:
                self.assert_numpy_array_equal(s.unique(), expected)

            self.assertEqual(s.nunique(), 3)

            # with NaT
            s = df['dt'].copy()
            s = klass([v for v in s.values] + [pd.NaT])

            result = s.value_counts()
            self.assertEqual(result.index.dtype, 'datetime64[ns]')
            tm.assert_series_equal(result, expected_s)

            result = s.value_counts(dropna=False)
            expected_s[pd.NaT] = 1
            tm.assert_series_equal(result, expected_s)

            unique = s.unique()
            self.assertEqual(unique.dtype, 'datetime64[ns]')

            # numpy_array_equal cannot compare pd.NaT
            if isinstance(s, DatetimeIndex):
                self.assert_index_equal(unique[:3], DatetimeIndex(expected))
            else:
                self.assert_numpy_array_equal(unique[:3], expected)
            self.assertTrue(unique[3] is pd.NaT or
                            unique[3].astype('int64') == pd.tslib.iNaT)

            self.assertEqual(s.nunique(), 3)
            self.assertEqual(s.nunique(dropna=False), 4)

            # timedelta64[ns]
            td = df.dt - df.dt + timedelta(1)
            td = klass(td, name='dt')

            result = td.value_counts()
            expected_s = Series([6], index=[Timedelta('1day')], name='dt')
            tm.assert_series_equal(result, expected_s)

            expected = TimedeltaIndex(['1 days'], name='dt')
            if isinstance(td, TimedeltaIndex):
                self.assert_index_equal(td.unique(), expected)
            else:
                self.assert_numpy_array_equal(td.unique(), expected.values)

            td2 = timedelta(1) + (df.dt - df.dt)
            td2 = klass(td2, name='dt')
            result2 = td2.value_counts()
            tm.assert_series_equal(result2, expected_s)
コード例 #15
0
ファイル: test_ops.py プロジェクト: zaki-indra/pandas
    def test_value_counts_datetime64(self, index_or_series):
        klass = index_or_series

        # GH 3002, datetime64[ns]
        # don't test names though
        txt = "\n".join([
            "xxyyzz20100101PIE",
            "xxyyzz20100101GUM",
            "xxyyzz20100101EGG",
            "xxyyww20090101EGG",
            "foofoo20080909PIE",
            "foofoo20080909GUM",
        ])
        f = StringIO(txt)
        df = pd.read_fwf(f,
                         widths=[6, 8, 3],
                         names=["person_id", "dt", "food"],
                         parse_dates=["dt"])

        s = klass(df["dt"].copy())
        s.name = None
        idx = pd.to_datetime([
            "2010-01-01 00:00:00", "2008-09-09 00:00:00", "2009-01-01 00:00:00"
        ])
        expected_s = Series([3, 2, 1], index=idx)
        tm.assert_series_equal(s.value_counts(), expected_s)

        expected = np_array_datetime64_compat(
            [
                "2010-01-01 00:00:00", "2009-01-01 00:00:00",
                "2008-09-09 00:00:00"
            ],
            dtype="datetime64[ns]",
        )
        if isinstance(s, Index):
            tm.assert_index_equal(s.unique(), DatetimeIndex(expected))
        else:
            tm.assert_numpy_array_equal(s.unique(), expected)

        assert s.nunique() == 3

        # with NaT
        s = df["dt"].copy()
        s = klass(list(s.values) + [pd.NaT])

        result = s.value_counts()
        assert result.index.dtype == "datetime64[ns]"
        tm.assert_series_equal(result, expected_s)

        result = s.value_counts(dropna=False)
        expected_s[pd.NaT] = 1
        tm.assert_series_equal(result, expected_s)

        unique = s.unique()
        assert unique.dtype == "datetime64[ns]"

        # numpy_array_equal cannot compare pd.NaT
        if isinstance(s, Index):
            exp_idx = DatetimeIndex(expected.tolist() + [pd.NaT])
            tm.assert_index_equal(unique, exp_idx)
        else:
            tm.assert_numpy_array_equal(unique[:3], expected)
            assert pd.isna(unique[3])

        assert s.nunique() == 3
        assert s.nunique(dropna=False) == 4

        # timedelta64[ns]
        td = df.dt - df.dt + timedelta(1)
        td = klass(td, name="dt")

        result = td.value_counts()
        expected_s = Series([6], index=[Timedelta("1day")], name="dt")
        tm.assert_series_equal(result, expected_s)

        expected = TimedeltaIndex(["1 days"], name="dt")
        if isinstance(td, Index):
            tm.assert_index_equal(td.unique(), expected)
        else:
            tm.assert_numpy_array_equal(td.unique(), expected.values)

        td2 = timedelta(1) + (df.dt - df.dt)
        td2 = klass(td2, name="dt")
        result2 = td2.value_counts()
        tm.assert_series_equal(result2, expected_s)
コード例 #16
0
 def date_parser(date, time):
     datetime = np_array_datetime64_compat(date + 'T' + time + 'Z',
                                           dtype='datetime64[s]')
     return datetime
コード例 #17
0
ファイル: test_parse_dates.py プロジェクト: chrish42/pandas
 def date_parser(dt, time):
     return np_array_datetime64_compat(dt + "T" + time + "Z",
                                       dtype="datetime64[s]")
コード例 #18
0
def test_parsing_valid_dates(data, expected):
    arr = np.array(data, dtype=object)
    result, _ = tslib.array_to_datetime(arr)

    expected = np_array_datetime64_compat(expected, dtype="M8[ns]")
    tm.assert_numpy_array_equal(result, expected)
コード例 #19
0
 def test_coerce_outside_ns_bounds_one_valid(self):
     arr = np.array(['1/1/1000', '1/1/2000'], dtype=object)
     result, _ = tslib.array_to_datetime(arr, errors='coerce')
     expected = [tslib.iNaT, '2000-01-01T00:00:00.000000000-0000']
     tm.assert_numpy_array_equal(
         result, np_array_datetime64_compat(expected, dtype='M8[ns]'))
コード例 #20
0
ファイル: parse_dates.py プロジェクト: mwaskom/pandas
 def date_parser(date, time):
     datetime = np_array_datetime64_compat(
         date + 'T' + time + 'Z', dtype='datetime64[s]')
     return datetime
コード例 #21
0
    def test_value_counts_inferred(self):
        klasses = [Index, Series]
        for klass in klasses:
            s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a']
            s = klass(s_values)
            expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c'])
            tm.assert_series_equal(s.value_counts(), expected)

            self.assert_numpy_array_equal(s.unique(), np.unique(s_values))
            self.assertEqual(s.nunique(), 4)
            # don't sort, have to sort after the fact as not sorting is
            # platform-dep
            hist = s.value_counts(sort=False).sort_values()
            expected = Series([3, 1, 4, 2], index=list('acbd')).sort_values()
            tm.assert_series_equal(hist, expected)

            # sort ascending
            hist = s.value_counts(ascending=True)
            expected = Series([1, 2, 3, 4], index=list('cdab'))
            tm.assert_series_equal(hist, expected)

            # relative histogram.
            hist = s.value_counts(normalize=True)
            expected = Series([.4, .3, .2, .1], index=['b', 'a', 'd', 'c'])
            tm.assert_series_equal(hist, expected)

            # bins
            self.assertRaises(TypeError,
                              lambda bins: s.value_counts(bins=bins), 1)

            s1 = Series([1, 1, 2, 3])
            res1 = s1.value_counts(bins=1)
            exp1 = Series({0.998: 4})
            tm.assert_series_equal(res1, exp1)
            res1n = s1.value_counts(bins=1, normalize=True)
            exp1n = Series({0.998: 1.0})
            tm.assert_series_equal(res1n, exp1n)

            self.assert_numpy_array_equal(s1.unique(), np.array([1, 2, 3]))
            self.assertEqual(s1.nunique(), 3)

            res4 = s1.value_counts(bins=4)
            exp4 = Series({
                0.998: 2,
                1.5: 1,
                2.0: 0,
                2.5: 1
            },
                          index=[0.998, 2.5, 1.5, 2.0])
            tm.assert_series_equal(res4, exp4)
            res4n = s1.value_counts(bins=4, normalize=True)
            exp4n = Series({
                0.998: 0.5,
                1.5: 0.25,
                2.0: 0.0,
                2.5: 0.25
            },
                           index=[0.998, 2.5, 1.5, 2.0])
            tm.assert_series_equal(res4n, exp4n)

            # handle NA's properly
            s_values = [
                'a', 'b', 'b', 'b', np.nan, np.nan, 'd', 'd', 'a', 'a', 'b'
            ]
            s = klass(s_values)
            expected = Series([4, 3, 2], index=['b', 'a', 'd'])
            tm.assert_series_equal(s.value_counts(), expected)

            self.assert_numpy_array_equal(
                s.unique(), np.array(['a', 'b', np.nan, 'd'], dtype='O'))
            self.assertEqual(s.nunique(), 3)

            s = klass({})
            expected = Series([], dtype=np.int64)
            tm.assert_series_equal(s.value_counts(),
                                   expected,
                                   check_index_type=False)
            self.assert_numpy_array_equal(s.unique(), np.array([]))
            self.assertEqual(s.nunique(), 0)

            # GH 3002, datetime64[ns]
            # don't test names though
            txt = "\n".join([
                'xxyyzz20100101PIE', 'xxyyzz20100101GUM', 'xxyyzz20100101EGG',
                'xxyyww20090101EGG', 'foofoo20080909PIE', 'foofoo20080909GUM'
            ])
            f = StringIO(txt)
            df = pd.read_fwf(f,
                             widths=[6, 8, 3],
                             names=["person_id", "dt", "food"],
                             parse_dates=["dt"])

            s = klass(df['dt'].copy())
            s.name = None

            idx = pd.to_datetime([
                '2010-01-01 00:00:00Z', '2008-09-09 00:00:00Z',
                '2009-01-01 00:00:00X'
            ])
            expected_s = Series([3, 2, 1], index=idx)
            tm.assert_series_equal(s.value_counts(), expected_s)

            expected = np_array_datetime64_compat([
                '2010-01-01 00:00:00Z', '2009-01-01 00:00:00Z',
                '2008-09-09 00:00:00Z'
            ],
                                                  dtype='datetime64[ns]')
            if isinstance(s, DatetimeIndex):
                expected = DatetimeIndex(expected)
                self.assertTrue(s.unique().equals(expected))
            else:
                self.assert_numpy_array_equal(s.unique(), expected)

            self.assertEqual(s.nunique(), 3)

            # with NaT
            s = df['dt'].copy()
            s = klass([v for v in s.values] + [pd.NaT])

            result = s.value_counts()
            self.assertEqual(result.index.dtype, 'datetime64[ns]')
            tm.assert_series_equal(result, expected_s)

            result = s.value_counts(dropna=False)
            expected_s[pd.NaT] = 1
            tm.assert_series_equal(result, expected_s)

            unique = s.unique()
            self.assertEqual(unique.dtype, 'datetime64[ns]')

            # numpy_array_equal cannot compare pd.NaT
            self.assert_numpy_array_equal(unique[:3], expected)
            self.assertTrue(unique[3] is pd.NaT
                            or unique[3].astype('int64') == pd.tslib.iNaT)

            self.assertEqual(s.nunique(), 3)
            self.assertEqual(s.nunique(dropna=False), 4)

            # timedelta64[ns]
            td = df.dt - df.dt + timedelta(1)
            td = klass(td, name='dt')

            result = td.value_counts()
            expected_s = Series([6], index=[Timedelta('1day')], name='dt')
            tm.assert_series_equal(result, expected_s)

            expected = TimedeltaIndex(['1 days'])
            if isinstance(td, TimedeltaIndex):
                self.assertTrue(td.unique().equals(expected))
            else:
                self.assert_numpy_array_equal(td.unique(), expected.values)

            td2 = timedelta(1) + (df.dt - df.dt)
            td2 = klass(td2, name='dt')
            result2 = td2.value_counts()
            tm.assert_series_equal(result2, expected_s)
コード例 #22
0
def test_parsing_valid_dates(data, expected):
    arr = np.array(data, dtype=object)
    result, _ = tslib.array_to_datetime(arr)

    expected = np_array_datetime64_compat(expected, dtype="M8[ns]")
    tm.assert_numpy_array_equal(result, expected)
コード例 #23
0
ファイル: test_parse_dates.py プロジェクト: zenquiorra/pandas
 def date_parser(dt, time):
     return np_array_datetime64_compat(dt + "T" + time + "Z",
                                       dtype="datetime64[s]")
コード例 #24
0
ファイル: test_base.py プロジェクト: Jeevsy/pandas
    def test_value_counts_inferred(self):
        klasses = [Index, Series]
        for klass in klasses:
            s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a']
            s = klass(s_values)
            expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c'])
            tm.assert_series_equal(s.value_counts(), expected)

            exp = np.unique(np.array(s_values, dtype=np.object_))
            self.assert_numpy_array_equal(s.unique(), exp)
            self.assertEqual(s.nunique(), 4)
            # don't sort, have to sort after the fact as not sorting is
            # platform-dep
            hist = s.value_counts(sort=False).sort_values()
            expected = Series([3, 1, 4, 2], index=list('acbd')).sort_values()
            tm.assert_series_equal(hist, expected)

            # sort ascending
            hist = s.value_counts(ascending=True)
            expected = Series([1, 2, 3, 4], index=list('cdab'))
            tm.assert_series_equal(hist, expected)

            # relative histogram.
            hist = s.value_counts(normalize=True)
            expected = Series([.4, .3, .2, .1], index=['b', 'a', 'd', 'c'])
            tm.assert_series_equal(hist, expected)

            # bins
            self.assertRaises(TypeError,
                              lambda bins: s.value_counts(bins=bins), 1)

            s1 = Series([1, 1, 2, 3])
            res1 = s1.value_counts(bins=1)
            exp1 = Series({0.998: 4})
            tm.assert_series_equal(res1, exp1)
            res1n = s1.value_counts(bins=1, normalize=True)
            exp1n = Series({0.998: 1.0})
            tm.assert_series_equal(res1n, exp1n)

            self.assert_numpy_array_equal(s1.unique(),
                                          np.array([1, 2, 3], dtype=np.int64))
            self.assertEqual(s1.nunique(), 3)

            res4 = s1.value_counts(bins=4)
            exp4 = Series({0.998: 2,
                           1.5: 1,
                           2.0: 0,
                           2.5: 1}, index=[0.998, 2.5, 1.5, 2.0])
            tm.assert_series_equal(res4, exp4)
            res4n = s1.value_counts(bins=4, normalize=True)
            exp4n = Series(
                {0.998: 0.5,
                 1.5: 0.25,
                 2.0: 0.0,
                 2.5: 0.25}, index=[0.998, 2.5, 1.5, 2.0])
            tm.assert_series_equal(res4n, exp4n)

            # handle NA's properly
            s_values = ['a', 'b', 'b', 'b', np.nan, np.nan,
                        'd', 'd', 'a', 'a', 'b']
            s = klass(s_values)
            expected = Series([4, 3, 2], index=['b', 'a', 'd'])
            tm.assert_series_equal(s.value_counts(), expected)

            exp = np.array(['a', 'b', np.nan, 'd'], dtype=np.object_)
            self.assert_numpy_array_equal(s.unique(), exp)
            self.assertEqual(s.nunique(), 3)

            s = klass({})
            expected = Series([], dtype=np.int64)
            tm.assert_series_equal(s.value_counts(), expected,
                                   check_index_type=False)
            # returned dtype differs depending on original
            self.assert_numpy_array_equal(s.unique(), np.array([]),
                                          check_dtype=False)
            self.assertEqual(s.nunique(), 0)

            # GH 3002, datetime64[ns]
            # don't test names though
            txt = "\n".join(['xxyyzz20100101PIE', 'xxyyzz20100101GUM',
                             'xxyyzz20100101EGG', 'xxyyww20090101EGG',
                             'foofoo20080909PIE', 'foofoo20080909GUM'])
            f = StringIO(txt)
            df = pd.read_fwf(f, widths=[6, 8, 3],
                             names=["person_id", "dt", "food"],
                             parse_dates=["dt"])

            s = klass(df['dt'].copy())
            s.name = None

            idx = pd.to_datetime(
                ['2010-01-01 00:00:00Z', '2008-09-09 00:00:00Z',
                 '2009-01-01 00:00:00X'])
            expected_s = Series([3, 2, 1], index=idx)
            tm.assert_series_equal(s.value_counts(), expected_s)

            expected = np_array_datetime64_compat(['2010-01-01 00:00:00Z',
                                                   '2009-01-01 00:00:00Z',
                                                   '2008-09-09 00:00:00Z'],
                                                  dtype='datetime64[ns]')
            if isinstance(s, DatetimeIndex):
                expected = DatetimeIndex(expected)
                self.assertTrue(s.unique().equals(expected))
            else:
                self.assert_numpy_array_equal(s.unique(), expected)

            self.assertEqual(s.nunique(), 3)

            # with NaT
            s = df['dt'].copy()
            s = klass([v for v in s.values] + [pd.NaT])

            result = s.value_counts()
            self.assertEqual(result.index.dtype, 'datetime64[ns]')
            tm.assert_series_equal(result, expected_s)

            result = s.value_counts(dropna=False)
            expected_s[pd.NaT] = 1
            tm.assert_series_equal(result, expected_s)

            unique = s.unique()
            self.assertEqual(unique.dtype, 'datetime64[ns]')

            # numpy_array_equal cannot compare pd.NaT
            self.assert_numpy_array_equal(unique[:3], expected)
            self.assertTrue(unique[3] is pd.NaT or unique[3].astype('int64') ==
                            pd.tslib.iNaT)

            self.assertEqual(s.nunique(), 3)
            self.assertEqual(s.nunique(dropna=False), 4)

            # timedelta64[ns]
            td = df.dt - df.dt + timedelta(1)
            td = klass(td, name='dt')

            result = td.value_counts()
            expected_s = Series([6], index=[Timedelta('1day')], name='dt')
            tm.assert_series_equal(result, expected_s)

            expected = TimedeltaIndex(['1 days'])
            if isinstance(td, TimedeltaIndex):
                self.assertTrue(td.unique().equals(expected))
            else:
                self.assert_numpy_array_equal(td.unique(), expected.values)

            td2 = timedelta(1) + (df.dt - df.dt)
            td2 = klass(td2, name='dt')
            result2 = td2.value_counts()
            tm.assert_series_equal(result2, expected_s)