Beispiel #1
0
    def test_endswith(self):
        values = Series(["om", NA, "foo_nom", "nom", "bar_foo", NA, "foo"])

        result = values.str.endswith("foo")
        exp = Series([False, NA, False, False, True, NA, True])
        tm.assert_series_equal(result, exp)

        # mixed
        mixed = ["a", NA, "b", True, datetime.today(), "foo", None, 1, 2.0]
        rs = strings.str_endswith(mixed, "f")
        xp = [False, NA, False, NA, NA, False, NA, NA, NA]
        tm.assert_almost_equal(rs, xp)

        rs = Series(mixed).str.endswith("f")
        self.assert_(isinstance(rs, Series))
        tm.assert_almost_equal(rs, xp)

        # unicode
        values = Series([u"om", NA, u"foo_nom", u"nom", u"bar_foo", NA, u"foo"])

        result = values.str.endswith("foo")
        exp = Series([False, NA, False, False, True, NA, True])
        tm.assert_series_equal(result, exp)

        result = values.str.endswith("foo", na=False)
        tm.assert_series_equal(result, exp.fillna(False).astype(bool))
Beispiel #2
0
    def test_quarterly_resampling(self):
        rng = period_range("2000Q1", periods=10, freq="Q-DEC")
        ts = Series(np.arange(10), index=rng)

        result = ts.resample("A")
        exp = ts.to_timestamp().resample("A").to_period()
        assert_series_equal(result, exp)
    def test_join_aware(self):
        rng = date_range("1/1/2011", periods=10, freq="H")
        ts = Series(np.random.randn(len(rng)), index=rng)

        ts_utc = ts.tz_localize("utc")

        self.assertRaises(Exception, ts.__add__, ts_utc)
        self.assertRaises(Exception, ts_utc.__add__, ts)

        test1 = DataFrame(
            np.zeros((6, 3)), index=date_range("2012-11-15 00:00:00", periods=6, freq="100L", tz="US/Central")
        )
        test2 = DataFrame(
            np.zeros((3, 3)),
            index=date_range("2012-11-15 00:00:00", periods=3, freq="250L", tz="US/Central"),
            columns=lrange(3, 6),
        )

        result = test1.join(test2, how="outer")
        ex_index = test1.index.union(test2.index)

        self.assertTrue(result.index.equals(ex_index))
        self.assertTrue(result.index.tz.zone == "US/Central")

        # non-overlapping
        rng = date_range("2012-11-15 00:00:00", periods=6, freq="H", tz="US/Central")

        rng2 = date_range("2012-11-15 12:00:00", periods=6, freq="H", tz="US/Eastern")

        result = rng.union(rng2)
        self.assertTrue(result.tz.zone == "UTC")
Beispiel #4
0
    def test_upsample_with_limit(self):
        rng = date_range("1/1/2000", periods=3, freq="5t")
        ts = Series(np.random.randn(len(rng)), rng)

        result = ts.resample("t", fill_method="ffill", limit=2)
        expected = ts.reindex(result.index, method="ffill", limit=2)
        assert_series_equal(result, expected)
Beispiel #5
0
    def test_upsample_with_limit(self):
        rng = period_range("1/1/2000", periods=5, freq="A")
        ts = Series(np.random.randn(len(rng)), rng)

        result = ts.resample("M", fill_method="ffill", limit=2, convention="end")
        expected = ts.asfreq("M").reindex(result.index, method="ffill", limit=2)
        assert_series_equal(result, expected)
Beispiel #6
0
    def test_partially_invalid_plot_data(self):
        s = Series(["a", "b", 1.0, 2])
        kinds = "line", "bar", "barh", "kde", "density"

        for kind in kinds:
            with tm.assertRaises(TypeError):
                s.plot(kind=kind)
Beispiel #7
0
 def test_zero_emsd(self):
     N = 10
     actual = tp.emsd(self.dead_still, 1, 1)
     expected = Series(np.zeros(N, dtype=np.float), index=np.arange(N, dtype=np.float)).iloc[1:]
     expected.index.name = "lagt"
     expected.name = "msd"
     assert_series_equal(actual, expected)
Beispiel #8
0
    def test_timedelta(self):
        converter = lambda x: pd.to_timedelta(x, unit="ms")

        s = Series([timedelta(23), timedelta(seconds=5)])
        self.assertEqual(s.dtype, "timedelta64[ns]")
        # index will be float dtype
        assert_series_equal(s, pd.read_json(s.to_json(), typ="series").apply(converter), check_index_type=False)

        s = Series([timedelta(23), timedelta(seconds=5)], index=pd.Index([0, 1], dtype=float))
        self.assertEqual(s.dtype, "timedelta64[ns]")
        assert_series_equal(s, pd.read_json(s.to_json(), typ="series").apply(converter))

        frame = DataFrame([timedelta(23), timedelta(seconds=5)])
        self.assertEqual(frame[0].dtype, "timedelta64[ns]")
        assert_frame_equal(
            frame, pd.read_json(frame.to_json()).apply(converter), check_index_type=False, check_column_type=False
        )

        frame = DataFrame(
            {
                "a": [timedelta(days=23), timedelta(seconds=5)],
                "b": [1, 2],
                "c": pd.date_range(start="20130101", periods=2),
            }
        )

        result = pd.read_json(frame.to_json(date_unit="ns"))
        result["a"] = pd.to_timedelta(result.a, unit="ns")
        result["c"] = pd.to_datetime(result.c)
        assert_frame_equal(frame, result, check_index_type=False)
Beispiel #9
0
    def test_apply(self):
        assert_series_equal(self.ts.apply(np.sqrt), np.sqrt(self.ts))

        # elementwise-apply
        import math

        assert_series_equal(self.ts.apply(math.exp), np.exp(self.ts))

        # how to handle Series result, #2316
        result = self.ts.apply(lambda x: Series([x, x ** 2], index=["x", "x^2"]))
        expected = DataFrame({"x": self.ts, "x^2": self.ts ** 2})
        tm.assert_frame_equal(result, expected)

        # empty series
        s = Series(dtype=object, name="foo", index=pd.Index([], name="bar"))
        rs = s.apply(lambda x: x)
        tm.assert_series_equal(s, rs)
        # check all metadata (GH 9322)
        self.assertIsNot(s, rs)
        self.assertIs(s.index, rs.index)
        self.assertEqual(s.dtype, rs.dtype)
        self.assertEqual(s.name, rs.name)

        # index but no data
        s = Series(index=[1, 2, 3])
        rs = s.apply(lambda x: x)
        tm.assert_series_equal(s, rs)
Beispiel #10
0
def plotAotiHour():

    fig = plt.figure(figsize=(18, 5))
    rect = fig.patch
    rect.set_facecolor("white")
    df = pd.read_csv("urban-country/aoti_pm25.csv", sep=",", header=0)
    df = df[df["date"] >= 20140514]
    df = df[df["date"] <= 20140527]
    df["date"] = df["date"].astype(str)
    df["hour"] = df["hour"].astype(str)
    dateAndTime = pd.to_datetime(df["date"] + df["hour"], format="%Y%m%d%H")
    aoti = df["奥体中心"].tolist()
    ts_aoti = Series(aoti, index=dateAndTime)
    plot = ts_aoti.plot(linestyle="-", color="black", marker="8", markersize=4, label=u"奥体中心")
    time = dt.datetime(2014, 05, 17, 10)
    df = df[df["date"] == "20140517"]
    df = df[df["hour"] == "10"]
    value = df.iloc[0, 3]
    print mdates.date2num(time), value
    plt.annotate(
        u"aoti24",
        xy=(mdates.date2num(time), value),
        xytext=(30, 20),
        textcoords="offset points",
        arrowprops=dict(arrowstyle="-|>"),
    )
    plt.show()
Beispiel #11
0
    def test_unstack(self):
        from numpy import nan
        from pandas.util.testing import assert_frame_equal

        index = MultiIndex(levels=[["bar", "foo"], ["one", "three", "two"]], labels=[[1, 1, 0, 0], [0, 1, 0, 2]])

        s = Series(np.arange(4.0), index=index)
        unstacked = s.unstack()

        expected = DataFrame([[2.0, nan, 3.0], [0.0, 1.0, nan]], index=["bar", "foo"], columns=["one", "three", "two"])

        assert_frame_equal(unstacked, expected)

        unstacked = s.unstack(level=0)
        assert_frame_equal(unstacked, expected.T)

        index = MultiIndex(
            levels=[["bar"], ["one", "two", "three"], [0, 1]],
            labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
        )
        s = Series(np.random.randn(6), index=index)
        exp_index = MultiIndex(
            levels=[["one", "two", "three"], [0, 1]], labels=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]]
        )
        expected = DataFrame({"bar": s.values}, index=exp_index).sortlevel(0)
        unstacked = s.unstack(0)
        assert_frame_equal(unstacked, expected)
Beispiel #12
0
    def test_timedelta64_nan(self):

        from pandas import tslib

        td = Series([timedelta(days=i) for i in range(10)])

        # nan ops on timedeltas
        td1 = td.copy()
        td1[0] = np.nan
        self.assertTrue(isnull(td1[0]))
        self.assertEqual(td1[0].value, tslib.iNaT)
        td1[0] = td[0]
        self.assertFalse(isnull(td1[0]))

        td1[1] = tslib.iNaT
        self.assertTrue(isnull(td1[1]))
        self.assertEqual(td1[1].value, tslib.iNaT)
        td1[1] = td[1]
        self.assertFalse(isnull(td1[1]))

        td1[2] = tslib.NaT
        self.assertTrue(isnull(td1[2]))
        self.assertEqual(td1[2].value, tslib.iNaT)
        td1[2] = td[2]
        self.assertFalse(isnull(td1[2]))
Beispiel #13
0
    def test_hourly(self):
        rng_hourly = date_range("1/1/1994", periods=(18 * 8760 + 4 * 24), freq="H")
        data_hourly = np.random.randint(100, 350, rng_hourly.size)
        ts_hourly = Series(data_hourly, index=rng_hourly)

        grouped = ts_hourly.groupby(ts_hourly.index.year)
        hoy = grouped.apply(lambda x: x.reset_index(drop=True))
        hoy = hoy.index.droplevel(0).values
        hoy[~isleapyear(ts_hourly.index.year) & (hoy >= 1416)] += 24
        hoy += 1

        annual = pivot_annual(ts_hourly)

        ts_hourly = ts_hourly.astype(float)
        for i in [1, 1416, 1417, 1418, 1439, 1440, 1441, 8784]:
            subset = ts_hourly[hoy == i]
            subset.index = [x.year for x in subset.index]

            result = annual[i].dropna()
            tm.assert_series_equal(result, subset, check_names=False)
            self.assertEqual(result.name, i)

        leaps = ts_hourly[(ts_hourly.index.month == 2) & (ts_hourly.index.day == 29) & (ts_hourly.index.hour == 0)]
        hour = leaps.index.dayofyear[0] * 24 - 23
        leaps.index = leaps.index.year
        leaps.name = 1417
        tm.assert_series_equal(annual[hour].dropna(), leaps)
Beispiel #14
0
 def test_to_csv_path_is_none(self):
     # GH 8215
     # Series.to_csv() was returning None, inconsistent with
     # DataFrame.to_csv() which returned string
     s = Series([1, 2, 3])
     csv_str = s.to_csv(path=None)
     self.assertIsInstance(csv_str, str)
Beispiel #15
0
def _wrap_results(result, dtype):
    """ wrap our results if needed """

    if issubclass(dtype.type, np.datetime64):
        if not isinstance(result, np.ndarray):
            result = lib.Timestamp(result)
        else:
            result = result.view(dtype)
    elif issubclass(dtype.type, np.timedelta64):
        if not isinstance(result, np.ndarray):

            # this is a scalar timedelta result!
            # we have series convert then take the element (scalar)
            # as series will do the right thing in py3 (and deal with numpy
            # 1.6.2 bug in that it results dtype of timedelta64[us]
            from pandas import Series

            # coerce float to results
            if is_float(result):
                result = int(result)
            result = Series([result], dtype="timedelta64[ns]")
        else:
            result = result.view(dtype)

    return result
Beispiel #16
0
    def test_ts_plot_format_coord(self):
        def check_format_of_first_point(ax, expected_string):
            first_line = ax.get_lines()[0]
            first_x = first_line.get_xdata()[0].ordinal
            first_y = first_line.get_ydata()[0]
            try:
                self.assertEqual(expected_string, ax.format_coord(first_x, first_y))
            except (ValueError):
                raise nose.SkipTest("skipping test because issue forming test comparison GH7664")

        annual = Series(1, index=date_range("2014-01-01", periods=3, freq="A-DEC"))
        check_format_of_first_point(annual.plot(), "t = 2014  y = 1.000000")

        # note this is added to the annual plot already in existence, and changes its freq field
        daily = Series(1, index=date_range("2014-01-01", periods=3, freq="D"))
        check_format_of_first_point(daily.plot(), "t = 2014-01-01  y = 1.000000")
        tm.close()

        # tsplot
        import matplotlib.pyplot as plt
        from pandas.tseries.plotting import tsplot

        tsplot(annual, plt.Axes.plot)
        check_format_of_first_point(plt.gca(), "t = 2014  y = 1.000000")
        tsplot(daily, plt.Axes.plot)
        check_format_of_first_point(plt.gca(), "t = 2014-01-01  y = 1.000000")
Beispiel #17
0
    def test_invalid_plot_data(self):
        s = Series(list("abcd"))
        kinds = "line", "bar", "barh", "kde", "density"

        for kind in kinds:
            with tm.assertRaises(TypeError):
                s.plot(kind=kind)
Beispiel #18
0
    def test_from_weekly_resampling(self):
        idxh = date_range("1/1/1999", periods=52, freq="W")
        idxl = date_range("1/1/1999", periods=12, freq="M")
        high = Series(np.random.randn(len(idxh)), idxh)
        low = Series(np.random.randn(len(idxl)), idxl)
        low.plot()
        ax = high.plot()

        expected_h = idxh.to_period().asi8
        expected_l = np.array([1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544, 1549, 1553, 1558, 1562])
        for l in ax.get_lines():
            self.assertTrue(PeriodIndex(data=l.get_xdata()).freq, idxh.freq)
            xdata = l.get_xdata(orig=False)
            if len(xdata) == 12:  # idxl lines
                self.assert_numpy_array_equal(xdata, expected_l)
            else:
                self.assert_numpy_array_equal(xdata, expected_h)
        tm.close()

        # tsplot
        from pandas.tseries.plotting import tsplot
        import matplotlib.pyplot as plt

        tsplot(low, plt.Axes.plot)
        lines = tsplot(high, plt.Axes.plot)
        for l in lines:
            self.assertTrue(PeriodIndex(data=l.get_xdata()).freq, idxh.freq)
            xdata = l.get_xdata(orig=False)
            if len(xdata) == 12:  # idxl lines
                self.assert_numpy_array_equal(xdata, expected_l)
            else:
                self.assert_numpy_array_equal(xdata, expected_h)
Beispiel #19
0
def quarter_plot(x, dates=None, ylabel=None, ax=None):
    """
    Seasonal plot of quarterly data

    Parameters
    ----------
    x : array-like
        Seasonal data to plot. If dates is None, x must be a pandas object
        with a PeriodIndex or DatetimeIndex with a monthly frequency.
    dates : array-like, optional
        If `x` is not a pandas object, then dates must be supplied.
    ylabel : str, optional
        The label for the y-axis. Will attempt to use the `name` attribute
        of the Series.
    ax : matplotlib.axes, optional
        Existing axes instance.

    Returns
    -------
    matplotlib.Figure
    """
    from pandas import DataFrame

    if dates is None:
        from statsmodels.tools.data import _check_period_index

        _check_period_index(x, freq="Q")
    else:
        from pandas import Series, PeriodIndex

        x = Series(x, index=PeriodIndex(dates, freq="Q"))

    xticklabels = ["q1", "q2", "q3", "q4"]
    return seasonal_plot(x.groupby(lambda y: y.quarter), xticklabels, ylabel=ylabel, ax=ax)
Beispiel #20
0
    def test_errorbar_plot(self):

        s = Series(np.arange(10))
        s_err = np.random.randn(10)

        # test line and bar plots
        kinds = ["line", "bar"]
        for kind in kinds:
            _check_plot_works(s.plot, yerr=Series(s_err), kind=kind)
            _check_plot_works(s.plot, yerr=s_err, kind=kind)
            _check_plot_works(s.plot, yerr=s_err.tolist(), kind=kind)

        _check_plot_works(s.plot, xerr=s_err)

        # test time series plotting
        ix = date_range("1/1/2000", "1/1/2001", freq="M")
        ts = Series(np.arange(12), index=ix)
        ts_err = Series(np.random.randn(12), index=ix)

        _check_plot_works(ts.plot, yerr=ts_err)

        # check incorrect lengths and types
        with tm.assertRaises(ValueError):
            s.plot(yerr=np.arange(11))

        s_err = ["zzz"] * 10
        with tm.assertRaises(TypeError):
            s.plot(yerr=s_err)
Beispiel #21
0
    def test_all_values_single_bin(self):
        # 2070
        index = period_range(start="2012-01-01", end="2012-12-31", freq="M")
        s = Series(np.random.randn(len(index)), index=index)

        result = s.resample("A", how="mean")
        tm.assert_almost_equal(result[0], s.mean())
Beispiel #22
0
    def test_to_string_float_na_spacing(self):
        s = Series([0.0, 1.5678, 2.0, -3.0, 4.0])
        s[::2] = np.nan

        result = s.to_string()
        expected = "0       NaN\n" "1    1.5678\n" "2       NaN\n" "3   -3.0000\n" "4       NaN"
        self.assertEqual(result, expected)
Beispiel #23
0
    def test_resample_base(self):
        rng = date_range("1/1/2000 00:00:00", "1/1/2000 02:00", freq="s")
        ts = Series(np.random.randn(len(rng)), index=rng)

        resampled = ts.resample("5min", base=2)
        exp_rng = date_range("12/31/1999 23:57:00", "1/1/2000 01:57", freq="5min")
        self.assert_(resampled.index.equals(exp_rng))
Beispiel #24
0
    def test_constructor_series(self):
        index1 = ["d", "b", "a", "c"]
        index2 = sorted(index1)
        s1 = Series([4, 7, -5, 3], index=index1)
        s2 = Series(s1, index=index2)

        assert_series_equal(s2, s1.sort_index())
Beispiel #25
0
    def test_resample_tz_localized(self):
        dr = date_range(start="2012-4-13", end="2012-5-1")
        ts = Series(lrange(len(dr)), dr)

        ts_utc = ts.tz_localize("UTC")
        ts_local = ts_utc.tz_convert("America/Los_Angeles")

        result = ts_local.resample("W")

        ts_local_naive = ts_local.copy()
        ts_local_naive.index = [x.replace(tzinfo=None) for x in ts_local_naive.index.to_pydatetime()]

        exp = ts_local_naive.resample("W").tz_localize("America/Los_Angeles")

        assert_series_equal(result, exp)

        # it works
        result = ts_local.resample("D")

        # #2245
        idx = date_range("2001-09-20 15:59", "2001-09-20 16:00", freq="T", tz="Australia/Sydney")
        s = Series([1, 2], index=idx)

        result = s.resample("D", closed="right", label="right")
        ex_index = date_range("2001-09-21", periods=1, freq="D", tz="Australia/Sydney")
        expected = Series([1.5], index=ex_index)

        assert_series_equal(result, expected)

        # for good measure
        result = s.resample("D", kind="period")
        ex_index = period_range("2001-09-20", periods=1, freq="D")
        expected = Series([1.5], index=ex_index)
        assert_series_equal(result, expected)
Beispiel #26
0
 def test_no_order(self):
     _skip_if_no_scipy()
     s = Series([0, 1, np.nan, 3])
     with tm.assertRaises(ValueError):
         s.interpolate(method="polynomial")
     with tm.assertRaises(ValueError):
         s.interpolate(method="spline")
Beispiel #27
0
 def test_mixed_freq_irreg_period(self):
     ts = tm.makeTimeSeries()
     irreg = ts[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 16, 17, 18, 29]]
     rng = period_range("1/3/2000", periods=30, freq="B")
     ps = Series(np.random.randn(len(rng)), rng)
     irreg.plot()
     ps.plot()
Beispiel #28
0
def ensure1d(x, name, series=False):
    if isinstance(x, Series):
        if not isinstance(x.name, str):
            x.name = str(x.name)
        if series:
            return x
        else:
            return np.asarray(x)

    if isinstance(x, DataFrame):
        if x.shape[1] != 1:
            raise ValueError(name + " must be squeezable to 1 dimension")
        else:
            x = Series(x[x.columns[0]], x.index)
            if not isinstance(x.name, str):
                x.name = str(x.name)
        if series:
            return x
        else:
            return np.asarray(x)

    if not isinstance(x, np.ndarray):
        x = np.asarray(x)
    if x.ndim == 0:
        x = x[None]
    elif x.ndim != 1:
        x = np.squeeze(x)
        if x.ndim != 1:
            raise ValueError(name + " must be squeezable to 1 dimension")

    if series:
        return Series(x, name=name)
    else:
        return np.asarray(x)
    def test_tz_aware_asfreq(self):
        dr = date_range("2011-12-01", "2012-07-20", freq="D", tz=self.tzstr("US/Eastern"))

        s = Series(np.random.randn(len(dr)), index=dr)

        # it works!
        s.asfreq("T")
Beispiel #30
0
 def test_series_density(self):
     # GH2803
     ts = Series(np.random.randn(10))
     ts[2:-2] = nan
     sts = ts.to_sparse()
     density = sts.density  # don't die
     self.assertEqual(density, 4 / 10.0)