Beispiel #1
0
    def test_resample_upsampling_picked_but_not_correct(self):

        # Test for issue #3020
        dates = date_range("01-Jan-2014", "05-Jan-2014", freq="D")
        series = Series(1, index=dates)

        result = series.resample("D")
        self.assertEqual(result.index[0], dates[0])

        # GH 5955
        # incorrect deciding to upsample when the axis frequency matches the resample frequency

        import datetime

        s = Series(np.arange(1.0, 6), index=[datetime.datetime(1975, 1, i, 12, 0) for i in range(1, 6)])
        expected = Series(np.arange(1.0, 6), index=date_range("19750101", periods=5, freq="D"))

        result = s.resample("D", how="count")
        assert_series_equal(result, Series(1, index=expected.index))

        result1 = s.resample("D", how="sum")
        result2 = s.resample("D", how="mean")
        result3 = s.resample("D")
        assert_series_equal(result1, expected)
        assert_series_equal(result2, expected)
        assert_series_equal(result3, expected)
Beispiel #2
0
    def test_resample_tz_localized(self):
        dr = date_range(start="2012-4-13", end="2012-5-1")
        ts = Series(lrange(len(dr)), dr)

        ts_utc = ts.tz_localize("UTC")
        ts_local = ts_utc.tz_convert("America/Los_Angeles")

        result = ts_local.resample("W")

        ts_local_naive = ts_local.copy()
        ts_local_naive.index = [x.replace(tzinfo=None) for x in ts_local_naive.index.to_pydatetime()]

        exp = ts_local_naive.resample("W").tz_localize("America/Los_Angeles")

        assert_series_equal(result, exp)

        # it works
        result = ts_local.resample("D")

        # #2245
        idx = date_range("2001-09-20 15:59", "2001-09-20 16:00", freq="T", tz="Australia/Sydney")
        s = Series([1, 2], index=idx)

        result = s.resample("D", closed="right", label="right")
        ex_index = date_range("2001-09-21", periods=1, freq="D", tz="Australia/Sydney")
        expected = Series([1.5], index=ex_index)

        assert_series_equal(result, expected)

        # for good measure
        result = s.resample("D", kind="period")
        ex_index = period_range("2001-09-20", periods=1, freq="D")
        expected = Series([1.5], index=ex_index)
        assert_series_equal(result, expected)
Beispiel #3
0
    def test_resample_loffset(self):
        rng = date_range("1/1/2000 00:00:00", "1/1/2000 00:13:00", freq="min")
        s = Series(np.random.randn(14), index=rng)

        result = s.resample("5min", how="mean", closed="right", label="right", loffset=timedelta(minutes=1))
        idx = date_range("1/1/2000", periods=4, freq="5min")
        expected = Series([s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()], index=idx + timedelta(minutes=1))
        assert_series_equal(result, expected)

        expected = s.resample("5min", how="mean", closed="right", label="right", loffset="1min")
        assert_series_equal(result, expected)

        expected = s.resample("5min", how="mean", closed="right", label="right", loffset=Minute(1))
        assert_series_equal(result, expected)

        self.assertEqual(result.index.freq, Minute(5))

        # from daily
        dti = DatetimeIndex(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D")
        ser = Series(np.random.rand(len(dti)), dti)

        # to weekly
        result = ser.resample("w-sun", how="last")
        expected = ser.resample("w-sun", how="last", loffset=-bday)
        self.assertEqual(result.index[0] - bday, expected.index[0])
Beispiel #4
0
    def test_weekly_resample_buglet(self):
        # #1327
        rng = date_range("1/1/2000", freq="B", periods=20)
        ts = Series(np.random.randn(len(rng)), index=rng)

        resampled = ts.resample("W")
        expected = ts.resample("W-SUN")
        assert_series_equal(resampled, expected)
Beispiel #5
0
    def test_resample_weekly_all_na(self):
        rng = date_range("1/1/2000", periods=10, freq="W-WED")
        ts = Series(np.random.randn(len(rng)), index=rng)

        result = ts.resample("W-THU")

        self.assert_(result.isnull().all())

        result = ts.resample("W-THU", fill_method="ffill")[:-1]
        expected = ts.asfreq("W-THU", method="ffill")
        assert_series_equal(result, expected)
Beispiel #6
0
class TestTimeGrouper(unittest.TestCase):
    def setUp(self):
        self.ts = Series(np.random.randn(1000), index=date_range("1/1/2000", periods=1000))

    def test_apply(self):
        grouper = TimeGrouper("A", label="right", closed="right")

        grouped = self.ts.groupby(grouper)

        f = lambda x: x.order()[-3:]

        applied = grouped.apply(f)
        expected = self.ts.groupby(lambda x: x.year).apply(f)

        applied.index = applied.index.droplevel(0)
        expected.index = expected.index.droplevel(0)
        assert_series_equal(applied, expected)

    def test_count(self):
        self.ts[::3] = np.nan

        grouper = TimeGrouper("A", label="right", closed="right")
        result = self.ts.resample("A", how="count")

        expected = self.ts.groupby(lambda x: x.year).count()
        expected.index = result.index

        assert_series_equal(result, expected)

    def test_numpy_reduction(self):
        result = self.ts.resample("A", how="prod", closed="right")

        expected = self.ts.groupby(lambda x: x.year).agg(np.prod)
        expected.index = result.index

        assert_series_equal(result, expected)

    def test_apply_iteration(self):
        # #2300
        N = 1000
        ind = pd.date_range(start="2000-01-01", freq="D", periods=N)
        df = DataFrame({"open": 1, "close": 2}, index=ind)
        tg = TimeGrouper("M")

        grouper = tg.get_grouper(df)

        # Errors

        grouped = df.groupby(grouper, group_keys=False)
        f = lambda df: df["close"] / df["open"]

        # it works!
        result = grouped.apply(f)
        self.assertTrue(result.index.equals(df.index))
Beispiel #7
0
    def test_closed_left_corner(self):
        # #1465
        s = Series(np.random.randn(21), index=date_range(start="1/1/2012 9:30", freq="1min", periods=21))
        s[0] = np.nan

        result = s.resample("10min", how="mean", closed="left", label="right")
        exp = s[1:].resample("10min", how="mean", closed="left", label="right")
        assert_series_equal(result, exp)

        result = s.resample("10min", how="mean", closed="left", label="left")
        exp = s[1:].resample("10min", how="mean", closed="left", label="left")
        assert_series_equal(result, exp)
Beispiel #8
0
    def test_secondary_y_mixed_freq_ts_xlim(self):
        # GH 3490 - mixed frequency timeseries with secondary y
        rng = date_range("2000-01-01", periods=10000, freq="min")
        ts = Series(1, index=rng)

        ax = ts.plot()
        left_before, right_before = ax.get_xlim()
        ts.resample("D").plot(secondary_y=True, ax=ax)
        left_after, right_after = ax.get_xlim()

        # a downsample should not have changed either limit
        self.assertEqual(left_before, left_after)
        self.assertEqual(right_before, right_after)
Beispiel #9
0
    def test_resample_tz_localized(self):
        dr = date_range(start="2012-4-13", end="2012-5-1")
        ts = Series(lrange(len(dr)), dr)

        ts_utc = ts.tz_localize("UTC")
        ts_local = ts_utc.tz_convert("America/Los_Angeles")

        result = ts_local.resample("W")

        ts_local_naive = ts_local.copy()
        ts_local_naive.index = [x.replace(tzinfo=None) for x in ts_local_naive.index.to_pydatetime()]

        exp = ts_local_naive.resample("W").tz_localize("America/Los_Angeles")

        assert_series_equal(result, exp)

        # it works
        result = ts_local.resample("D")

        # #2245
        idx = date_range("2001-09-20 15:59", "2001-09-20 16:00", freq="T", tz="Australia/Sydney")
        s = Series([1, 2], index=idx)

        result = s.resample("D", closed="right", label="right")
        ex_index = date_range("2001-09-21", periods=1, freq="D", tz="Australia/Sydney")
        expected = Series([1.5], index=ex_index)

        assert_series_equal(result, expected)

        # for good measure
        result = s.resample("D", kind="period")
        ex_index = period_range("2001-09-20", periods=1, freq="D")
        expected = Series([1.5], index=ex_index)
        assert_series_equal(result, expected)

        # GH 6397
        # comparing an offset that doesn't propogate tz's
        rng = date_range("1/1/2011", periods=20000, freq="H")
        rng = rng.tz_localize("EST")
        ts = DataFrame(index=rng)
        ts["first"] = np.random.randn(len(rng))
        ts["second"] = np.cumsum(np.random.randn(len(rng)))
        expected = DataFrame(
            {"first": ts.resample("A", how=np.sum)["first"], "second": ts.resample("A", how=np.mean)["second"]},
            columns=["first", "second"],
        )
        result = ts.resample("A", how={"first": np.sum, "second": np.mean}).reindex(columns=["first", "second"])
        assert_frame_equal(result, expected)
Beispiel #10
0
    def test_all_values_single_bin(self):
        # 2070
        index = period_range(start="2012-01-01", end="2012-12-31", freq="M")
        s = Series(np.random.randn(len(index)), index=index)

        result = s.resample("A", how="mean")
        tm.assert_almost_equal(result[0], s.mean())
Beispiel #11
0
    def test_annual_upsample(self):
        targets = ["D", "B", "M"]

        for month in MONTHS:
            ts = _simple_pts("1/1/1990", "12/31/1995", freq="A-%s" % month)

            for targ, conv, meth in product(targets, ["start", "end"], ["ffill", "bfill"]):
                result = ts.resample(targ, fill_method=meth, convention=conv)
                expected = result.to_timestamp(targ, how=conv)
                expected = expected.asfreq(targ, meth).to_period()
                assert_series_equal(result, expected)

        df = DataFrame({"a": ts})
        rdf = df.resample("D", fill_method="ffill")
        exp = df["a"].resample("D", fill_method="ffill")
        assert_series_equal(rdf["a"], exp)

        rng = period_range("2000", "2003", freq="A-DEC")
        ts = Series([1, 2, 3, 4], index=rng)

        result = ts.resample("M", fill_method="ffill")
        ex_index = period_range("2000-01", "2003-12", freq="M")

        expected = ts.asfreq("M", how="start").reindex(ex_index, method="ffill")
        assert_series_equal(result, expected)
Beispiel #12
0
    def test_quarterly_resampling(self):
        rng = period_range("2000Q1", periods=10, freq="Q-DEC")
        ts = Series(np.arange(10), index=rng)

        result = ts.resample("A")
        exp = ts.to_timestamp().resample("A").to_period()
        assert_series_equal(result, exp)
Beispiel #13
0
    def test_with_nan(self):
        # basic asof test
        rng = date_range("1/1/2000", "1/2/2000", freq="4h")
        s = Series(np.arange(len(rng)), index=rng)
        r = s.resample("2h").mean()

        result = r.asof(r.index)
        expected = Series(
            [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6.0], index=date_range("1/1/2000", "1/2/2000", freq="2h")
        )
        tm.assert_series_equal(result, expected)

        r.iloc[3:5] = np.nan
        result = r.asof(r.index)
        expected = Series(
            [0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 5, 5, 6.0], index=date_range("1/1/2000", "1/2/2000", freq="2h")
        )
        tm.assert_series_equal(result, expected)

        r.iloc[-3:] = np.nan
        result = r.asof(r.index)
        expected = Series(
            [0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4, 4.0], index=date_range("1/1/2000", "1/2/2000", freq="2h")
        )
        tm.assert_series_equal(result, expected)
Beispiel #14
0
    def test_resample_base(self):
        rng = date_range("1/1/2000 00:00:00", "1/1/2000 02:00", freq="s")
        ts = Series(np.random.randn(len(rng)), index=rng)

        resampled = ts.resample("5min", base=2)
        exp_rng = date_range("12/31/1999 23:57:00", "1/1/2000 01:57", freq="5min")
        self.assert_(resampled.index.equals(exp_rng))
Beispiel #15
0
    def test_resample_single_group(self):
        mysum = lambda x: x.sum()

        rng = date_range("2000-1-1", "2000-2-10", freq="D")
        ts = Series(np.random.randn(len(rng)), index=rng)
        assert_series_equal(ts.resample("M", how="sum"), ts.resample("M", how=mysum))

        rng = date_range("2000-1-1", "2000-1-10", freq="D")
        ts = Series(np.random.randn(len(rng)), index=rng)
        assert_series_equal(ts.resample("M", how="sum"), ts.resample("M", how=mysum))

        # GH 3849
        s = Series([30.1, 31.6], index=[Timestamp("20070915 15:30:00"), Timestamp("20070915 15:40:00")])
        expected = Series([0.75], index=[Timestamp("20070915")])
        result = s.resample("D", how=lambda x: np.std(x))
        assert_series_equal(result, expected)
Beispiel #16
0
    def test_upsample_with_limit(self):
        rng = period_range("1/1/2000", periods=5, freq="A")
        ts = Series(np.random.randn(len(rng)), rng)

        result = ts.resample("M", fill_method="ffill", limit=2, convention="end")
        expected = ts.asfreq("M").reindex(result.index, method="ffill", limit=2)
        assert_series_equal(result, expected)
def calculate_frequencies(comments, aggregation_interval=DEFAULT_AGGREGATION_INTERVAL):
    s = Series([1] * len(comments), comments)
    freq = s.resample(aggregation_interval, how="sum", label="right")

    # return freq.fillna(0)
    freq.dropna(inplace=True)
    return freq
Beispiel #18
0
    def test_upsample_with_limit(self):
        rng = date_range("1/1/2000", periods=3, freq="5t")
        ts = Series(np.random.randn(len(rng)), rng)

        result = ts.resample("t", fill_method="ffill", limit=2)
        expected = ts.reindex(result.index, method="ffill", limit=2)
        assert_series_equal(result, expected)
Beispiel #19
0
    def test_metadata_propagation_indiv(self):
        # check that the metadata matches up on the resulting ops

        o = Series(range(3), range(3))
        o.name = "foo"
        o2 = Series(range(3), range(3))
        o2.name = "bar"

        result = o.T
        self.check_metadata(o, result)

        # resample
        ts = Series(np.random.rand(1000), index=date_range("20130101", periods=1000, freq="s"), name="foo")
        result = ts.resample("1T")
        self.check_metadata(ts, result)

        result = ts.resample("1T", how="min")
        self.check_metadata(ts, result)

        result = ts.resample("1T", how=lambda x: x.sum())
        self.check_metadata(ts, result)

        _metadata = Series._metadata
        _finalize = Series.__finalize__
        Series._metadata = ["name", "filename"]
        o.filename = "foo"
        o2.filename = "bar"

        def finalize(self, other, method=None, **kwargs):
            for name in self._metadata:
                if method == "concat" and name == "filename":
                    value = "+".join([getattr(o, name) for o in other.objs if getattr(o, name, None)])
                    object.__setattr__(self, name, value)
                else:
                    object.__setattr__(self, name, getattr(other, name, None))

            return self

        Series.__finalize__ = finalize

        result = pd.concat([o, o2])
        self.assertEquals(result.filename, "foo+bar")
        self.assertIsNone(result.name)

        # reset
        Series._metadata = _metadata
        Series.__finalize__ = _finalize
Beispiel #20
0
    def test_resample_daily_anchored(self):
        rng = date_range("1/1/2000 0:00:00", periods=10000, freq="T")
        ts = Series(np.random.randn(len(rng)), index=rng)
        ts[:2] = np.nan  # so results are the same

        result = ts[2:].resample("D", closed="left", label="left")
        expected = ts.resample("D", closed="left", label="left")
        assert_series_equal(result, expected)
Beispiel #21
0
 def test_resample_reresample(self):
     dti = DatetimeIndex(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D")
     s = Series(np.random.rand(len(dti)), dti)
     bs = s.resample("B", closed="right", label="right")
     result = bs.resample("8H")
     self.assertEquals(len(result), 22)
     tm.assert_isinstance(result.index.freq, offsets.DateOffset)
     self.assertEqual(result.index.freq, offsets.Hour(8))
Beispiel #22
0
    def test_upsample_apply_functions(self):
        # #1596
        rng = pd.date_range("2012-06-12", periods=4, freq="h")

        ts = Series(np.random.randn(len(rng)), index=rng)

        result = ts.resample("20min", how=["mean", "sum"])
        tm.assert_isinstance(result, DataFrame)
Beispiel #23
0
    def test_resample_doesnt_truncate(self):
        # Test for issue #3020
        import pandas as pd

        dates = pd.date_range("01-Jan-2014", "05-Jan-2014", freq="D")
        series = Series(1, index=dates)

        result = series.resample("D")
        self.assertEquals(result.index[0], dates[0])
Beispiel #24
0
def find_photos_given_region(lat, lng, resample_freq, db_name, count_people=True):
    mongo = pymongo.Connection("grande", 27017)
    mongo_db = mongo[db_name]
    mongo_collection = mongo_db.photos
    dates = []
    counts = []
    seen = set()
    pre_date = (
        {}
    )  # for a single user, when is his last photo; this is used to avoid to count a user's taking many pics in a row
    photos = []

    for p in mongo_collection.find({"mid_lat": str(lat), "mid_lng": str(lng)}):
        if p["id"] in seen:
            continue
        seen.add(p["id"])
        photos.append(p)

    photos = sorted(photos, key=lambda x: x["created_time"])

    photos_return = []
    for p in photos:
        if count_people:
            user = p["user"]["username"]
            if user not in pre_date:
                pre_date[user] = p["created_time"]
                photos_return.append(p)
                dates.append(datetime.utcfromtimestamp(float(p["created_time"])))
                counts.append(1)
            else:
                if (
                    float(p["created_time"]) - float(pre_date[user]) > 600
                ):  # within 10 minutes count it as a single user
                    # print 'now ',p['created_time'], ' pre',pre_date[user]
                    pre_date[user] = p["created_time"]
                    photos_return.append(p)
                    dates.append(datetime.utcfromtimestamp(float(p["created_time"])))
                    counts.append(1)
                else:
                    # print 'pre is ',datetime.utcfromtimestamp(float(pre_date[user])),' next is ',datetime.utcfromtimestamp(float(p['created_time']))
                    continue
        else:
            dates.append(datetime.utcfromtimestamp(float(p["created_time"])))
            counts.append(1)
            photos_return.append(p)
    """ 
    for p in photos_return:
        print p['created_time'] , p['user']['username'], p['link']
    print '\n\n'
    """
    ts = Series(counts, index=dates)

    ts = ts.resample(resample_freq, how="sum", label="right")
    if db_name == "citybeat":
        # limit to recent 1000 hours of data
        ts = ts[-1000:]
    return ts, photos_return
Beispiel #25
0
    def test_resample_irregular_sparse(self):
        dr = date_range(start="1/1/2012", freq="5min", periods=1000)
        s = Series(np.array(100), index=dr)
        # subset the data.
        subset = s[:"2012-01-04 06:55"]

        result = subset.resample("10min", how=len)
        expected = s.resample("10min", how=len).ix[result.index]
        assert_series_equal(result, expected)
Beispiel #26
0
    def test_resample_not_monotonic(self):
        rng = pd.date_range("2012-06-12", periods=200, freq="h")
        ts = Series(np.random.randn(len(rng)), index=rng)

        ts = ts.take(np.random.permutation(len(ts)))

        result = ts.resample("D", how="sum")
        exp = ts.sort_index().resample("D", how="sum")
        assert_series_equal(result, exp)
Beispiel #27
0
    def test_resample_to_period_monthly_buglet(self):
        # GH #1259

        rng = date_range("1/1/2000", "12/31/2000")
        ts = Series(np.random.randn(len(rng)), index=rng)

        result = ts.resample("M", kind="period")
        exp_index = period_range("Jan-2000", "Dec-2000", freq="M")
        self.assert_(result.index.equals(exp_index))
Beispiel #28
0
    def test_resample_upsample(self):
        # from daily
        dti = DatetimeIndex(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D")

        s = Series(np.random.rand(len(dti)), dti)

        # to minutely, by padding
        result = s.resample("Min", fill_method="pad")
        self.assertEquals(len(result), 12961)
        self.assertEquals(result[0], s[0])
        self.assertEquals(result[-1], s[-1])
Beispiel #29
0
    def test_downsample_non_unique(self):
        rng = date_range("1/1/2000", "2/29/2000")
        rng2 = rng.repeat(5).values
        ts = Series(np.random.randn(len(rng2)), index=rng2)

        result = ts.resample("M", how="mean")

        expected = ts.groupby(lambda x: x.month).mean()
        self.assertEquals(len(result), 2)
        assert_almost_equal(result[0], expected[1])
        assert_almost_equal(result[1], expected[2])
Beispiel #30
0
    def test_corner_cases(self):
        # miscellaneous test coverage

        rng = date_range("1/1/2000", periods=12, freq="t")
        ts = Series(np.random.randn(len(rng)), index=rng)

        result = ts.resample("5t", closed="right", label="left")
        ex_index = date_range("1999-12-31 23:55", periods=4, freq="5t")
        self.assert_(result.index.equals(ex_index))

        len0pts = _simple_pts("2007-01", "2010-05", freq="M")[:0]
        # it works
        result = len0pts.resample("A-DEC")
        self.assertEqual(len(result), 0)

        # resample to periods
        ts = _simple_ts("2000-04-28", "2000-04-30 11:00", freq="h")
        result = ts.resample("M", kind="period")
        self.assertEqual(len(result), 1)
        self.assertEqual(result.index[0], Period("2000-04", freq="M"))